Total coverage: 390732 (19%)of 2096047
700 1 1 700 702 888 889 712 714 44 44 2 2 13 4 4 4 12 2 10 2 8 1 1 8 7 6 3 1 2 13 3 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 // SPDX-License-Identifier: GPL-2.0 /* * shstk.c - Intel shadow stack support * * Copyright (c) 2021, Intel Corporation. * Yu-cheng Yu <yu-cheng.yu@intel.com> */ #include <linux/sched.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/sched/signal.h> #include <linux/compat.h> #include <linux/sizes.h> #include <linux/user.h> #include <linux/syscalls.h> #include <asm/msr.h> #include <asm/fpu/xstate.h> #include <asm/fpu/types.h> #include <asm/shstk.h> #include <asm/special_insns.h> #include <asm/fpu/api.h> #include <asm/prctl.h> #define SS_FRAME_SIZE 8 static bool features_enabled(unsigned long features) { return current->thread.features & features; } static void features_set(unsigned long features) { current->thread.features |= features; } static void features_clr(unsigned long features) { current->thread.features &= ~features; } /* * Create a restore token on the shadow stack. A token is always 8-byte * and aligned to 8. */ static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) { unsigned long addr; /* Token must be aligned */ if (!IS_ALIGNED(ssp, 8)) return -EINVAL; addr = ssp - SS_FRAME_SIZE; /* * SSP is aligned, so reserved bits and mode bit are a zero, just mark * the token 64-bit. */ ssp |= BIT(0); if (write_user_shstk_64((u64 __user *)addr, (u64)ssp)) return -EFAULT; if (token_addr) *token_addr = addr; return 0; } /* * VM_SHADOW_STACK will have a guard page. This helps userspace protect * itself from attacks. The reasoning is as follows: * * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The * INCSSP instruction can increment the shadow stack pointer. It is the * shadow stack analog of an instruction like: * * addq $0x80, %rsp * * However, there is one important difference between an ADD on %rsp * and INCSSP. In addition to modifying SSP, INCSSP also reads from the * memory of the first and last elements that were "popped". It can be * thought of as acting like this: * * READ_ONCE(ssp); // read+discard top element on stack * ssp += nr_to_pop * 8; // move the shadow stack * READ_ONCE(ssp-8); // read+discard last popped stack element * * The maximum distance INCSSP can move the SSP is 2040 bytes, before * it would read the memory. Therefore a single page gap will be enough * to prevent any operation from shifting the SSP to an adjacent stack, * since it would have to land in the gap at least once, causing a * fault. */ static unsigned long alloc_shstk(unsigned long addr, unsigned long size, unsigned long token_offset, bool set_res_tok) { unsigned long mapped_addr; mapped_addr = vm_mmap_shadow_stack(addr, size, MAP_ABOVE4G); if (!set_res_tok || IS_ERR_VALUE(mapped_addr)) goto out; if (create_rstor_token(mapped_addr + token_offset, NULL)) { vm_munmap(mapped_addr, size); return -EINVAL; } out: return mapped_addr; } static unsigned long adjust_shstk_size(unsigned long size) { if (size) return PAGE_ALIGN(size); return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); } static void unmap_shadow_stack(u64 base, u64 size) { int r; r = vm_munmap(base, size); /* * mmap_write_lock_killable() failed with -EINTR. This means * the process is about to die and have it's MM cleaned up. * This task shouldn't ever make it back to userspace. In this * case it is ok to leak a shadow stack, so just exit out. */ if (r == -EINTR) return; /* * For all other types of vm_munmap() failure, either the * system is out of memory or there is bug. */ WARN_ON_ONCE(r); } static int shstk_setup(void) { struct thread_shstk *shstk = &current->thread.shstk; unsigned long addr, size; /* Already enabled */ if (features_enabled(ARCH_SHSTK_SHSTK)) return 0; /* Also not supported for 32 bit */ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_ia32_syscall()) return -EOPNOTSUPP; size = adjust_shstk_size(0); addr = alloc_shstk(0, size, 0, false); if (IS_ERR_VALUE(addr)) return PTR_ERR((void *)addr); fpregs_lock_and_load(); wrmsrq(MSR_IA32_PL3_SSP, addr + size); wrmsrq(MSR_IA32_U_CET, CET_SHSTK_EN); fpregs_unlock(); shstk->base = addr; shstk->size = size; features_set(ARCH_SHSTK_SHSTK); return 0; } void reset_thread_features(void) { memset(&current->thread.shstk, 0, sizeof(struct thread_shstk)); current->thread.features = 0; current->thread.features_locked = 0; } unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, u64 clone_flags, unsigned long stack_size) { struct thread_shstk *shstk = &tsk->thread.shstk; unsigned long addr, size; /* * If shadow stack is not enabled on the new thread, skip any * switch to a new shadow stack. */ if (!features_enabled(ARCH_SHSTK_SHSTK)) return 0; /* * For CLONE_VFORK the child will share the parents shadow stack. * Make sure to clear the internal tracking of the thread shadow * stack so the freeing logic run for child knows to leave it alone. */ if (clone_flags & CLONE_VFORK) { shstk->base = 0; shstk->size = 0; return 0; } /* * For !CLONE_VM the child will use a copy of the parents shadow * stack. */ if (!(clone_flags & CLONE_VM)) return 0; size = adjust_shstk_size(stack_size); addr = alloc_shstk(0, size, 0, false); if (IS_ERR_VALUE(addr)) return addr; shstk->base = addr; shstk->size = size; return addr + size; } static unsigned long get_user_shstk_addr(void) { unsigned long long ssp; fpregs_lock_and_load(); rdmsrq(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return ssp; } int shstk_pop(u64 *val) { int ret = 0; u64 ssp; if (!features_enabled(ARCH_SHSTK_SHSTK)) return -ENOTSUPP; fpregs_lock_and_load(); rdmsrq(MSR_IA32_PL3_SSP, ssp); if (val && get_user(*val, (__user u64 *)ssp)) ret = -EFAULT; else wrmsrq(MSR_IA32_PL3_SSP, ssp + SS_FRAME_SIZE); fpregs_unlock(); return ret; } int shstk_push(u64 val) { u64 ssp; int ret; if (!features_enabled(ARCH_SHSTK_SHSTK)) return -ENOTSUPP; fpregs_lock_and_load(); rdmsrq(MSR_IA32_PL3_SSP, ssp); ssp -= SS_FRAME_SIZE; ret = write_user_shstk_64((__user void *)ssp, val); if (!ret) wrmsrq(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return ret; } #define SHSTK_DATA_BIT BIT(63) static int put_shstk_data(u64 __user *addr, u64 data) { if (WARN_ON_ONCE(data & SHSTK_DATA_BIT)) return -EINVAL; /* * Mark the high bit so that the sigframe can't be processed as a * return address. */ if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT)) return -EFAULT; return 0; } static int get_shstk_data(unsigned long *data, unsigned long __user *addr) { unsigned long ldata; if (unlikely(get_user(ldata, addr))) return -EFAULT; if (!(ldata & SHSTK_DATA_BIT)) return -EINVAL; *data = ldata & ~SHSTK_DATA_BIT; return 0; } static int shstk_push_sigframe(unsigned long *ssp) { unsigned long target_ssp = *ssp; /* Token must be aligned */ if (!IS_ALIGNED(target_ssp, 8)) return -EINVAL; *ssp -= SS_FRAME_SIZE; if (put_shstk_data((void __user *)*ssp, target_ssp)) return -EFAULT; return 0; } static int shstk_pop_sigframe(unsigned long *ssp) { struct vm_area_struct *vma; unsigned long token_addr; bool need_to_check_vma; int err = 1; /* * It is possible for the SSP to be off the end of a shadow stack by 4 * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes * before it, it might be this case, so check that the address being * read is actually shadow stack. */ if (!IS_ALIGNED(*ssp, 8)) return -EINVAL; need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp; if (need_to_check_vma) if (mmap_read_lock_killable(current->mm)) return -EINTR; err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); if (unlikely(err)) goto out_err; if (need_to_check_vma) { vma = find_vma(current->mm, *ssp); if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) { err = -EFAULT; goto out_err; } mmap_read_unlock(current->mm); } /* Restore SSP aligned? */ if (unlikely(!IS_ALIGNED(token_addr, 8))) return -EINVAL; /* SSP in userspace? */ if (unlikely(token_addr >= TASK_SIZE_MAX)) return -EINVAL; *ssp = token_addr; return 0; out_err: if (need_to_check_vma) mmap_read_unlock(current->mm); return err; } int setup_signal_shadow_stack(struct ksignal *ksig) { void __user *restorer = ksig->ka.sa.sa_restorer; unsigned long ssp; int err; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !features_enabled(ARCH_SHSTK_SHSTK)) return 0; if (!restorer) return -EINVAL; ssp = get_user_shstk_addr(); if (unlikely(!ssp)) return -EINVAL; err = shstk_push_sigframe(&ssp); if (unlikely(err)) return err; /* Push restorer address */ ssp -= SS_FRAME_SIZE; err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer); if (unlikely(err)) return -EFAULT; fpregs_lock_and_load(); wrmsrq(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return 0; } int restore_signal_shadow_stack(void) { unsigned long ssp; int err; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !features_enabled(ARCH_SHSTK_SHSTK)) return 0; ssp = get_user_shstk_addr(); if (unlikely(!ssp)) return -EINVAL; err = shstk_pop_sigframe(&ssp); if (unlikely(err)) return err; fpregs_lock_and_load(); wrmsrq(MSR_IA32_PL3_SSP, ssp); fpregs_unlock(); return 0; } void shstk_free(struct task_struct *tsk) { struct thread_shstk *shstk = &tsk->thread.shstk; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || !features_enabled(ARCH_SHSTK_SHSTK)) return; /* * When fork() with CLONE_VM fails, the child (tsk) already has a * shadow stack allocated, and exit_thread() calls this function to * free it. In this case the parent (current) and the child share * the same mm struct. */ if (!tsk->mm || tsk->mm != current->mm) return; /* * If shstk->base is NULL, then this task is not managing its * own shadow stack (CLONE_VFORK). So skip freeing it. */ if (!shstk->base) return; /* * shstk->base is NULL for CLONE_VFORK child tasks, and so is * normal. But size = 0 on a shstk->base is not normal and * indicated an attempt to free the thread shadow stack twice. * Warn about it. */ if (WARN_ON(!shstk->size)) return; unmap_shadow_stack(shstk->base, shstk->size); shstk->size = 0; } static int wrss_control(bool enable) { u64 msrval; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) return -EOPNOTSUPP; /* * Only enable WRSS if shadow stack is enabled. If shadow stack is not * enabled, WRSS will already be disabled, so don't bother clearing it * when disabling. */ if (!features_enabled(ARCH_SHSTK_SHSTK)) return -EPERM; /* Already enabled/disabled? */ if (features_enabled(ARCH_SHSTK_WRSS) == enable) return 0; fpregs_lock_and_load(); rdmsrq(MSR_IA32_U_CET, msrval); if (enable) { features_set(ARCH_SHSTK_WRSS); msrval |= CET_WRSS_EN; } else { features_clr(ARCH_SHSTK_WRSS); if (!(msrval & CET_WRSS_EN)) goto unlock; msrval &= ~CET_WRSS_EN; } wrmsrq(MSR_IA32_U_CET, msrval); unlock: fpregs_unlock(); return 0; } static int shstk_disable(void) { if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) return -EOPNOTSUPP; /* Already disabled? */ if (!features_enabled(ARCH_SHSTK_SHSTK)) return 0; fpregs_lock_and_load(); /* Disable WRSS too when disabling shadow stack */ wrmsrq(MSR_IA32_U_CET, 0); wrmsrq(MSR_IA32_PL3_SSP, 0); fpregs_unlock(); shstk_free(current); features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS); return 0; } SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) { bool set_tok = flags & SHADOW_STACK_SET_TOKEN; unsigned long aligned_size; if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) return -EOPNOTSUPP; if (flags & ~SHADOW_STACK_SET_TOKEN) return -EINVAL; /* If there isn't space for a token */ if (set_tok && size < 8) return -ENOSPC; if (addr && addr < SZ_4G) return -ERANGE; /* * An overflow would result in attempting to write the restore token * to the wrong location. Not catastrophic, but just return the right * error code and block it. */ aligned_size = PAGE_ALIGN(size); if (aligned_size < size) return -EOVERFLOW; return alloc_shstk(addr, aligned_size, size, set_tok); } long shstk_prctl(struct task_struct *task, int option, unsigned long arg2) { unsigned long features = arg2; if (option == ARCH_SHSTK_STATUS) { return put_user(task->thread.features, (unsigned long __user *)arg2); } if (option == ARCH_SHSTK_LOCK) { task->thread.features_locked |= features; return 0; } /* Only allow via ptrace */ if (task != current) { if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) { task->thread.features_locked &= ~features; return 0; } return -EINVAL; } /* Do not allow to change locked features */ if (features & task->thread.features_locked) return -EPERM; /* Only support enabling/disabling one feature at a time. */ if (hweight_long(features) > 1) return -EINVAL; if (option == ARCH_SHSTK_DISABLE) { if (features & ARCH_SHSTK_WRSS) return wrss_control(false); if (features & ARCH_SHSTK_SHSTK) return shstk_disable(); return -EINVAL; } /* Handle ARCH_SHSTK_ENABLE */ if (features & ARCH_SHSTK_SHSTK) return shstk_setup(); if (features & ARCH_SHSTK_WRSS) return wrss_control(true); return -EINVAL; } int shstk_update_last_frame(unsigned long val) { unsigned long ssp; if (!features_enabled(ARCH_SHSTK_SHSTK)) return 0; ssp = get_user_shstk_addr(); return write_user_shstk_64((u64 __user *)ssp, (u64)val); } bool shstk_is_enabled(void) { return features_enabled(ARCH_SHSTK_SHSTK); }
29 29 29 29 29 29 29 29 29 29 15 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/ceph/ceph_debug.h> #include <linux/types.h> #include <linux/percpu_counter.h> #include <linux/math64.h> #include "metric.h" #include "mds_client.h" static void ktime_to_ceph_timespec(struct ceph_timespec *ts, ktime_t val) { struct timespec64 t = ktime_to_timespec64(val); ceph_encode_timespec64(ts, &t); } static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, struct ceph_mds_session *s) { struct ceph_metric_head *head; struct ceph_metric_cap *cap; struct ceph_metric_read_latency *read; struct ceph_metric_write_latency *write; struct ceph_metric_metadata_latency *meta; struct ceph_metric_dlease *dlease; struct ceph_opened_files *files; struct ceph_pinned_icaps *icaps; struct ceph_opened_inodes *inodes; struct ceph_read_io_size *rsize; struct ceph_write_io_size *wsize; struct ceph_client_metric *m = &mdsc->metric; u64 nr_caps = atomic64_read(&m->total_caps); u32 header_len = sizeof(struct ceph_metric_header); struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; s64 sum; s32 items = 0; s32 len; /* Do not send the metrics until the MDS rank is ready */ mutex_lock(&mdsc->mutex); if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) { mutex_unlock(&mdsc->mutex); return false; } mutex_unlock(&mdsc->mutex); len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + sizeof(*meta) + sizeof(*dlease) + sizeof(*files) + sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize) + sizeof(*wsize); msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); if (!msg) { pr_err_client(cl, "to mds%d, failed to allocate message\n", s->s_mds); return false; } head = msg->front.iov_base; /* encode the cap metric */ cap = (struct ceph_metric_cap *)(head + 1); cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); cap->header.ver = 1; cap->header.compat = 1; cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len); cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit)); cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis)); cap->total = cpu_to_le64(nr_caps); items++; /* encode the read latency metric */ read = (struct ceph_metric_read_latency *)(cap + 1); read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); read->header.ver = 2; read->header.compat = 1; read->header.data_len = cpu_to_le32(sizeof(*read) - header_len); sum = m->metric[METRIC_READ].latency_sum; ktime_to_ceph_timespec(&read->lat, sum); ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg); read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum); read->count = cpu_to_le64(m->metric[METRIC_READ].total); items++; /* encode the write latency metric */ write = (struct ceph_metric_write_latency *)(read + 1); write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); write->header.ver = 2; write->header.compat = 1; write->header.data_len = cpu_to_le32(sizeof(*write) - header_len); sum = m->metric[METRIC_WRITE].latency_sum; ktime_to_ceph_timespec(&write->lat, sum); ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg); write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum); write->count = cpu_to_le64(m->metric[METRIC_WRITE].total); items++; /* encode the metadata latency metric */ meta = (struct ceph_metric_metadata_latency *)(write + 1); meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); meta->header.ver = 2; meta->header.compat = 1; meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len); sum = m->metric[METRIC_METADATA].latency_sum; ktime_to_ceph_timespec(&meta->lat, sum); ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg); meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum); meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total); items++; /* encode the dentry lease metric */ dlease = (struct ceph_metric_dlease *)(meta + 1); dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE); dlease->header.ver = 1; dlease->header.compat = 1; dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len); dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit)); dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis)); dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries)); items++; sum = percpu_counter_sum(&m->total_inodes); /* encode the opened files metric */ files = (struct ceph_opened_files *)(dlease + 1); files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES); files->header.ver = 1; files->header.compat = 1; files->header.data_len = cpu_to_le32(sizeof(*files) - header_len); files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files)); files->total = cpu_to_le64(sum); items++; /* encode the pinned icaps metric */ icaps = (struct ceph_pinned_icaps *)(files + 1); icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS); icaps->header.ver = 1; icaps->header.compat = 1; icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len); icaps->pinned_icaps = cpu_to_le64(nr_caps); icaps->total = cpu_to_le64(sum); items++; /* encode the opened inodes metric */ inodes = (struct ceph_opened_inodes *)(icaps + 1); inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES); inodes->header.ver = 1; inodes->header.compat = 1; inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len); inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes)); inodes->total = cpu_to_le64(sum); items++; /* encode the read io size metric */ rsize = (struct ceph_read_io_size *)(inodes + 1); rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES); rsize->header.ver = 1; rsize->header.compat = 1; rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len); rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total); rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum); items++; /* encode the write io size metric */ wsize = (struct ceph_write_io_size *)(rsize + 1); wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES); wsize->header.ver = 1; wsize->header.compat = 1; wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len); wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total); wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum); items++; put_unaligned_le32(items, &head->num); msg->front.iov_len = len; msg->hdr.version = cpu_to_le16(1); msg->hdr.compat_version = cpu_to_le16(1); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); ceph_con_send(&s->s_con, msg); return true; } static void metric_get_session(struct ceph_mds_client *mdsc) { struct ceph_mds_session *s; int i; mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { s = __ceph_lookup_mds_session(mdsc, i); if (!s) continue; /* * Skip it if MDS doesn't support the metric collection, * or the MDS will close the session's socket connection * directly when it get this message. */ if (check_session_state(s) && test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) { mdsc->metric.session = s; break; } ceph_put_mds_session(s); } mutex_unlock(&mdsc->mutex); } static void metric_delayed_work(struct work_struct *work) { struct ceph_client_metric *m = container_of(work, struct ceph_client_metric, delayed_work.work); struct ceph_mds_client *mdsc = container_of(m, struct ceph_mds_client, metric); if (mdsc->stopping || disable_send_metrics) return; if (!m->session || !check_session_state(m->session)) { if (m->session) { ceph_put_mds_session(m->session); m->session = NULL; } metric_get_session(mdsc); } if (m->session) { ceph_mdsc_send_metrics(mdsc, m->session); metric_schedule_delayed(m); } } int ceph_metric_init(struct ceph_client_metric *m) { struct ceph_metric *metric; int ret, i; if (!m) return -EINVAL; atomic64_set(&m->total_dentries, 0); ret = percpu_counter_init(&m->d_lease_hit, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&m->d_lease_mis, 0, GFP_KERNEL); if (ret) goto err_d_lease_mis; atomic64_set(&m->total_caps, 0); ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL); if (ret) goto err_i_caps_hit; ret = percpu_counter_init(&m->i_caps_mis, 0, GFP_KERNEL); if (ret) goto err_i_caps_mis; for (i = 0; i < METRIC_MAX; i++) { metric = &m->metric[i]; spin_lock_init(&metric->lock); metric->size_sum = 0; metric->size_min = U64_MAX; metric->size_max = 0; metric->total = 0; metric->latency_sum = 0; metric->latency_avg = 0; metric->latency_sq_sum = 0; metric->latency_min = KTIME_MAX; metric->latency_max = 0; } atomic64_set(&m->opened_files, 0); ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL); if (ret) goto err_opened_inodes; ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL); if (ret) goto err_total_inodes; m->session = NULL; INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); return 0; err_total_inodes: percpu_counter_destroy(&m->opened_inodes); err_opened_inodes: percpu_counter_destroy(&m->i_caps_mis); err_i_caps_mis: percpu_counter_destroy(&m->i_caps_hit); err_i_caps_hit: percpu_counter_destroy(&m->d_lease_mis); err_d_lease_mis: percpu_counter_destroy(&m->d_lease_hit); return ret; } void ceph_metric_destroy(struct ceph_client_metric *m) { if (!m) return; cancel_delayed_work_sync(&m->delayed_work); percpu_counter_destroy(&m->total_inodes); percpu_counter_destroy(&m->opened_inodes); percpu_counter_destroy(&m->i_caps_mis); percpu_counter_destroy(&m->i_caps_hit); percpu_counter_destroy(&m->d_lease_mis); percpu_counter_destroy(&m->d_lease_hit); ceph_put_mds_session(m->session); } #define METRIC_UPDATE_MIN_MAX(min, max, new) \ { \ if (unlikely(new < min)) \ min = new; \ if (unlikely(new > max)) \ max = new; \ } static inline void __update_mean_and_stdev(ktime_t total, ktime_t *lavg, ktime_t *sq_sump, ktime_t lat) { ktime_t avg; if (unlikely(total == 1)) { *lavg = lat; } else { /* the sq is (lat - old_avg) * (lat - new_avg) */ avg = *lavg + div64_s64(lat - *lavg, total); *sq_sump += (lat - *lavg)*(lat - avg); *lavg = avg; } } void ceph_update_metrics(struct ceph_metric *m, ktime_t r_start, ktime_t r_end, unsigned int size, int rc) { ktime_t lat = ktime_sub(r_end, r_start); ktime_t total; if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT)) return; spin_lock(&m->lock); total = ++m->total; m->size_sum += size; METRIC_UPDATE_MIN_MAX(m->size_min, m->size_max, size); m->latency_sum += lat; METRIC_UPDATE_MIN_MAX(m->latency_min, m->latency_max, lat); __update_mean_and_stdev(total, &m->latency_avg, &m->latency_sq_sum, lat); spin_unlock(&m->lock); }
17 15 1 20 1 1 2 1 27 27 8 9 14 4 20 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 // SPDX-License-Identifier: GPL-2.0 #include <asm/ioctls.h> #include <linux/io_uring/net.h> #include <linux/errqueue.h> #include <net/sock.h> #include "uring_cmd.h" #include "io_uring.h" static int io_uring_cmd_get_sock_ioctl(struct socket *sock, int op) { struct sock *sk = sock->sk; struct proto *prot = READ_ONCE(sk->sk_prot); int ret, arg = 0; if (!prot || !prot->ioctl) return -EOPNOTSUPP; ret = prot->ioctl(sk, op, &arg); if (ret) return ret; return arg; } static inline int io_uring_cmd_getsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { const struct io_uring_sqe *sqe = cmd->sqe; bool compat = !!(issue_flags & IO_URING_F_COMPAT); int optlen, optname, level, err; void __user *optval; level = READ_ONCE(sqe->level); if (level != SOL_SOCKET) return -EOPNOTSUPP; optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); optname = READ_ONCE(sqe->optname); optlen = READ_ONCE(sqe->optlen); err = do_sock_getsockopt(sock, compat, level, optname, USER_SOCKPTR(optval), KERNEL_SOCKPTR(&optlen)); if (err) return err; /* On success, return optlen */ return optlen; } static inline int io_uring_cmd_setsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { const struct io_uring_sqe *sqe = cmd->sqe; bool compat = !!(issue_flags & IO_URING_F_COMPAT); int optname, optlen, level; void __user *optval; sockptr_t optval_s; optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); optname = READ_ONCE(sqe->optname); optlen = READ_ONCE(sqe->optlen); level = READ_ONCE(sqe->level); optval_s = USER_SOCKPTR(optval); return do_sock_setsockopt(sock, compat, level, optname, optval_s, optlen); } static bool io_process_timestamp_skb(struct io_uring_cmd *cmd, struct sock *sk, struct sk_buff *skb, unsigned issue_flags) { struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); struct io_uring_cqe cqe[2]; struct io_timespec *iots; struct timespec64 ts; u32 tstype, tskey; int ret; BUILD_BUG_ON(sizeof(struct io_uring_cqe) != sizeof(struct io_timespec)); ret = skb_get_tx_timestamp(skb, sk, &ts); if (ret < 0) return false; tskey = serr->ee.ee_data; tstype = serr->ee.ee_info; cqe->user_data = 0; cqe->res = tskey; cqe->flags = IORING_CQE_F_MORE | ctx_cqe32_flags(cmd_to_io_kiocb(cmd)->ctx); cqe->flags |= tstype << IORING_TIMESTAMP_TYPE_SHIFT; if (ret == SOF_TIMESTAMPING_TX_HARDWARE) cqe->flags |= IORING_CQE_F_TSTAMP_HW; iots = (struct io_timespec *)&cqe[1]; iots->tv_sec = ts.tv_sec; iots->tv_nsec = ts.tv_nsec; return io_uring_cmd_post_mshot_cqe32(cmd, issue_flags, cqe); } static int io_uring_cmd_timestamp(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { struct sock *sk = sock->sk; struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *tmp; struct sk_buff_head list; int ret; if (!(issue_flags & IO_URING_F_CQE32)) return -EINVAL; ret = io_cmd_poll_multishot(cmd, issue_flags, EPOLLERR); if (unlikely(ret)) return ret; if (skb_queue_empty_lockless(q)) return -EAGAIN; __skb_queue_head_init(&list); scoped_guard(spinlock_irq, &q->lock) { skb_queue_walk_safe(q, skb, tmp) { /* don't support skbs with payload */ if (!skb_has_tx_timestamp(skb, sk) || skb->len) continue; __skb_unlink(skb, q); __skb_queue_tail(&list, skb); } } while (1) { skb = skb_peek(&list); if (!skb) break; if (!io_process_timestamp_skb(cmd, sk, skb, issue_flags)) break; __skb_dequeue(&list); consume_skb(skb); } if (!unlikely(skb_queue_empty(&list))) { scoped_guard(spinlock_irqsave, &q->lock) skb_queue_splice(&list, q); } return -EAGAIN; } static int io_uring_cmd_getsockname(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { const struct io_uring_sqe *sqe = cmd->sqe; struct sockaddr __user *uaddr; unsigned int peer; int __user *ulen; if (sqe->ioprio || sqe->__pad1 || sqe->len || sqe->rw_flags) return -EINVAL; uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); ulen = u64_to_user_ptr(READ_ONCE(sqe->addr3)); peer = READ_ONCE(sqe->optlen); if (peer > 1) return -EINVAL; return do_getsockname(sock, peer, uaddr, ulen); } int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct socket *sock = cmd->file->private_data; switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: return io_uring_cmd_get_sock_ioctl(sock, SIOCINQ); case SOCKET_URING_OP_SIOCOUTQ: return io_uring_cmd_get_sock_ioctl(sock, SIOCOUTQ); case SOCKET_URING_OP_GETSOCKOPT: return io_uring_cmd_getsockopt(sock, cmd, issue_flags); case SOCKET_URING_OP_SETSOCKOPT: return io_uring_cmd_setsockopt(sock, cmd, issue_flags); case SOCKET_URING_OP_TX_TIMESTAMP: return io_uring_cmd_timestamp(sock, cmd, issue_flags); case SOCKET_URING_OP_GETSOCKNAME: return io_uring_cmd_getsockname(sock, cmd, issue_flags); default: return -EOPNOTSUPP; } } EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
15 1238 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_LOCAL_H #define _ASM_X86_LOCAL_H #include <linux/percpu.h> #include <linux/atomic.h> #include <asm/asm.h> typedef struct { atomic_long_t a; } local_t; #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) #define local_set(l, i) atomic_long_set(&(l)->a, (i)) static inline void local_inc(local_t *l) { asm volatile(_ASM_INC "%0" : "+m" (l->a.counter)); } static inline void local_dec(local_t *l) { asm volatile(_ASM_DEC "%0" : "+m" (l->a.counter)); } static inline void local_add(long i, local_t *l) { asm volatile(_ASM_ADD "%1,%0" : "+m" (l->a.counter) : "ir" (i)); } static inline void local_sub(long i, local_t *l) { asm volatile(_ASM_SUB "%1,%0" : "+m" (l->a.counter) : "ir" (i)); } /** * local_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @l: pointer to type local_t * * Atomically subtracts @i from @l and returns * true if the result is zero, or false for all * other cases. */ static inline bool local_sub_and_test(long i, local_t *l) { return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i); } /** * local_dec_and_test - decrement and test * @l: pointer to type local_t * * Atomically decrements @l by 1 and * returns true if the result is 0, or false for all other * cases. */ static inline bool local_dec_and_test(local_t *l) { return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e); } /** * local_inc_and_test - increment and test * @l: pointer to type local_t * * Atomically increments @l by 1 * and returns true if the result is zero, or false for all * other cases. */ static inline bool local_inc_and_test(local_t *l) { return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e); } /** * local_add_negative - add and test if negative * @i: integer value to add * @l: pointer to type local_t * * Atomically adds @i to @l and returns true * if the result is negative, or false when * result is greater than or equal to zero. */ static inline bool local_add_negative(long i, local_t *l) { return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i); } /** * local_add_return - add and return * @i: integer value to add * @l: pointer to type local_t * * Atomically adds @i to @l and returns @i + @l */ static inline long local_add_return(long i, local_t *l) { long __i = i; asm volatile(_ASM_XADD "%0, %1" : "+r" (i), "+m" (l->a.counter) : : "memory"); return i + __i; } static inline long local_sub_return(long i, local_t *l) { return local_add_return(-i, l); } #define local_inc_return(l) (local_add_return(1, l)) #define local_dec_return(l) (local_sub_return(1, l)) static inline long local_cmpxchg(local_t *l, long old, long new) { return cmpxchg_local(&l->a.counter, old, new); } static inline bool local_try_cmpxchg(local_t *l, long *old, long new) { return try_cmpxchg_local(&l->a.counter, (typeof(l->a.counter) *) old, new); } /* * Implement local_xchg using CMPXCHG instruction without the LOCK prefix. * XCHG is expensive due to the implied LOCK prefix. The processor * cannot prefetch cachelines if XCHG is used. */ static __always_inline long local_xchg(local_t *l, long n) { long c = local_read(l); do { } while (!local_try_cmpxchg(l, &c, n)); return c; } /** * local_add_unless - add unless the number is already a given value * @l: pointer of type local_t * @a: the amount to add to l... * @u: ...unless l is equal to u. * * Atomically adds @a to @l, if @v was not already @u. * Returns true if the addition was done. */ static __always_inline bool local_add_unless(local_t *l, long a, long u) { long c = local_read(l); do { if (unlikely(c == u)) return false; } while (!local_try_cmpxchg(l, &c, c + a)); return true; } #define local_inc_not_zero(l) local_add_unless((l), 1, 0) /* On x86_32, these are no better than the atomic variants. * On x86-64 these are better than the atomic variants on SMP kernels * because they dont use a lock prefix. */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) #define __local_add(i, l) local_add((i), (l)) #define __local_sub(i, l) local_sub((i), (l)) #endif /* _ASM_X86_LOCAL_H */
23 23 23 23 5 23 23 13 13 12 12 23 23 23 13 12 23 23 23 23 23 23 23 3 23 23 23 23 20 1 1 1 20 19 19 19 5 5 5 5 5 5 5 5 13 5 5 5 13 13 13 5 18 5 5 5 5 5 5 5 21 21 21 21 21 7 7 7 3 5 5 2 2 2 2 2 2 2 19 19 19 19 2 2 2 19 19 19 19 19 19 19 19 19 23 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 5 5 5 21 5 21 21 21 21 21 21 21 21 21 23 21 22 23 23 23 21 21 21 21 21 21 21 21 21 21 21 3 21 21 21 11 11 11 20 20 17 20 17 20 20 20 20 17 20 21 21 21 19 15 15 13 8 13 7 2 1 1 22 23 3 3 20 1 24 24 24 23 21 3 21 15 13 8 7 8 7 6 15 24 15 13 4 12 9 4 13 1 1 1 1 1 1 1 1 7 2 7 1 2 1 2 1 2 2 2 4 4 4 1 1 1 1 4 4 1 1 1 1 1 4 68 67 66 66 66 65 44 43 42 42 42 42 109 110 68 44 110 109 108 108 108 13 13 108 110 13 8 11 10 8 3 120 120 116 111 111 2 112 110 110 2 103 12 11 101 82 91 105 3 6 4 7 11 11 6 7 14 21 26 4 26 7 20 19 19 19 7 3 2 2 2 18 12 5 5 3 2 1 4 2 1 1 1 1 14 5 2 4 4 8 8 5 5 5 8 9 8 9 1 1 25 25 25 25 4 62 25 25 5 5 3 1 1 2 2 2 5 2 5 3 5 3 4 3 4 8 8 5 4 3 8 8 8 8 8 2 1 7 6 2 3 5 8 7 7 7 7 6 5 4 3 9 8 2 1 7 9 23 20 5 16 2 16 2 15 3 14 2 14 5 10 4 8 4 66 66 65 65 3 1 1 1 1 62 61 61 61 61 61 61 61 2 60 3 59 1 59 3 57 3 57 2 6 1 55 55 51 10 19 21 66 90 1 90 87 90 77 85 78 11 9 85 74 73 66 66 21 51 5 3 51 51 51 24 23 8 8 24 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 5 3 3 2 2 2 2 10 7 10 2 3 3 1 2 1 2 1 1 1 4 3 3 4 1 1 1 1 3 3 3 2 1 6 5 4 3 3 6 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 // SPDX-License-Identifier: GPL-2.0 /* Connection tracking via netlink socket. Allows for user space * protocol helpers and general trouble making from userspace. * * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> * (C) 2005-2012 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) * * Further development of this code funded by Astaro AG (http://www.astaro.com) */ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/rculist.h> #include <linux/rculist_nulls.h> #include <linux/types.h> #include <linux/timer.h> #include <linux/security.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/netlink.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/siphash.h> #include <linux/netfilter.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/sock.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> #if IS_ENABLED(CONFIG_NF_NAT) #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #endif #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include "nf_internals.h" MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("List and change connection tracking table"); struct ctnetlink_list_dump_ctx { unsigned long last_id; unsigned int cpu; bool done; }; static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTO_NUM, tuple->dst.protonum)) goto nla_put_failure; if (likely(l4proto->tuple_to_nlattr)) ret = l4proto->tuple_to_nlattr(skb, tuple); nla_nest_end(skb, nest_parms); return ret; nla_put_failure: return -1; } static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) return -EMSGSIZE; return 0; } static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) return -EMSGSIZE; return 0; } static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { int ret = 0; struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, CTA_TUPLE_IP); if (!nest_parms) goto nla_put_failure; switch (tuple->src.l3num) { case NFPROTO_IPV4: ret = ipv4_tuple_to_nlattr(skb, tuple); break; case NFPROTO_IPV6: ret = ipv6_tuple_to_nlattr(skb, tuple); break; } nla_nest_end(skb, nest_parms); return ret; nla_put_failure: return -1; } static int ctnetlink_dump_tuples(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { const struct nf_conntrack_l4proto *l4proto; int ret; rcu_read_lock(); ret = ctnetlink_dump_tuples_ip(skb, tuple); if (ret >= 0) { l4proto = nf_ct_l4proto_find(tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto); } rcu_read_unlock(); return ret; } static int ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, const struct nf_conntrack_zone *zone, int dir) { if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir) return 0; if (nla_put_be16(skb, attrtype, htons(zone->id))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct, bool skip_zero) { long timeout; if (nf_ct_is_confirmed(ct)) timeout = nf_ct_expires(ct) / HZ; else timeout = ct->timeout / HZ; if (skip_zero && timeout == 0) return 0; if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct, bool destroy) { const struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; int ret; l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (!l4proto->to_nlattr) return 0; nest_proto = nla_nest_start(skb, CTA_PROTOINFO); if (!nest_proto) goto nla_put_failure; ret = l4proto->to_nlattr(skb, nest_proto, ct, destroy); nla_nest_end(skb, nest_proto); return ret; nla_put_failure: return -1; } static int ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; if (!help) return 0; rcu_read_lock(); helper = rcu_dereference(help->helper); if (!helper) goto out; nest_helper = nla_nest_start(skb, CTA_HELP); if (!nest_helper) goto nla_put_failure; if (nla_put_string(skb, CTA_HELP_NAME, helper->name)) goto nla_put_failure; if (helper->to_nlattr) helper->to_nlattr(skb, ct); nla_nest_end(skb, nest_helper); out: rcu_read_unlock(); return 0; nla_put_failure: rcu_read_unlock(); return -1; } static int dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, enum ip_conntrack_dir dir, int type) { enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nf_conn_counter *counter = acct->counter; struct nlattr *nest_count; u64 pkts, bytes; if (type == IPCTNL_MSG_CT_GET_CTRZERO) { pkts = atomic64_xchg(&counter[dir].packets, 0); bytes = atomic64_xchg(&counter[dir].bytes, 0); } else { pkts = atomic64_read(&counter[dir].packets); bytes = atomic64_read(&counter[dir].bytes); } nest_count = nla_nest_start(skb, attr); if (!nest_count) goto nla_put_failure; if (nla_put_be64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts), CTA_COUNTERS_PAD) || nla_put_be64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes), CTA_COUNTERS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest_count); return 0; nla_put_failure: return -1; } static int ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type) { struct nf_conn_acct *acct = nf_conn_acct_find(ct); if (!acct) return 0; if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0) return -1; if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0) return -1; return 0; } static int ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_count; const struct nf_conn_tstamp *tstamp; tstamp = nf_conn_tstamp_find(ct); if (!tstamp) return 0; nest_count = nla_nest_start(skb, CTA_TIMESTAMP); if (!nest_count) goto nla_put_failure; if (nla_put_be64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start), CTA_TIMESTAMP_PAD) || (tstamp->stop != 0 && nla_put_be64(skb, CTA_TIMESTAMP_STOP, cpu_to_be64(tstamp->stop), CTA_TIMESTAMP_PAD))) goto nla_put_failure; nla_nest_end(skb, nest_count); return 0; nla_put_failure: return -1; } #ifdef CONFIG_NF_CONNTRACK_MARK static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct, bool dump) { u32 mark = READ_ONCE(ct->mark); if (!mark && !dump) return 0; if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; nla_put_failure: return -1; } #else #define ctnetlink_dump_mark(a, b, c) (0) #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_secctx; struct lsm_context ctx; int ret; ret = security_secid_to_secctx(ct->secmark, &ctx); if (ret < 0) return 0; ret = -1; nest_secctx = nla_nest_start(skb, CTA_SECCTX); if (!nest_secctx) goto nla_put_failure; if (nla_put_string(skb, CTA_SECCTX_NAME, ctx.context)) goto nla_put_failure; nla_nest_end(skb, nest_secctx); ret = 0; nla_put_failure: security_release_secctx(&ctx); return ret; } #else #define ctnetlink_dump_secctx(a, b) (0) #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_dump_event_timestamp(struct sk_buff *skb, const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP const struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct); if (e) { u64 ts = local64_read(&e->timestamp); if (ts) return nla_put_be64(skb, CTA_TIMESTAMP_EVENT, cpu_to_be64(ts), CTA_TIMESTAMP_PAD); } #endif return 0; } static inline int ctnetlink_label_size(const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); if (!labels) return 0; return nla_total_size(sizeof(labels->bits)); } #endif static int ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); unsigned int i; if (!labels) return 0; i = 0; do { if (labels->bits[i] != 0) return nla_put(skb, CTA_LABELS, sizeof(labels->bits), labels->bits); i++; } while (i < ARRAY_SIZE(labels->bits)); return 0; } #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_parms; if (!(ct->status & IPS_EXPECTED)) return 0; nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } static int dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type) { struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, type); if (!nest_parms) goto nla_put_failure; if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS, htonl(seq->correction_pos)) || nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE, htonl(seq->offset_before)) || nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER, htonl(seq->offset_after))) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *seq; if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj) return 0; spin_lock_bh(&ct->lock); seq = &seqadj->seq[IP_CT_DIR_ORIGINAL]; if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1) goto err; seq = &seqadj->seq[IP_CT_DIR_REPLY]; if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1) goto err; spin_unlock_bh(&ct->lock); return 0; err: spin_unlock_bh(&ct->lock); return -1; } static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conn_synproxy *synproxy = nfct_synproxy(ct); struct nlattr *nest_parms; if (!synproxy) return 0; nest_parms = nla_nest_start(skb, CTA_SYNPROXY); if (!nest_parms) goto nla_put_failure; if (nla_put_be32(skb, CTA_SYNPROXY_ISN, htonl(synproxy->isn)) || nla_put_be32(skb, CTA_SYNPROXY_ITS, htonl(synproxy->its)) || nla_put_be32(skb, CTA_SYNPROXY_TSOFF, htonl(synproxy->tsoff))) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { __be32 id = (__force __be32)nf_ct_get_id(ct); if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use)))) goto nla_put_failure; return 0; nla_put_failure: return -1; } /* all these functions access ct->ext. Caller must either hold a reference * on ct or prevent its deletion by holding either the bucket spinlock or * pcpu dying list lock. */ static int ctnetlink_dump_extinfo(struct sk_buff *skb, struct nf_conn *ct, u32 type) { if (ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_ct_seq_adj(skb, ct) < 0 || ctnetlink_dump_ct_synproxy(skb, ct) < 0) return -1; return 0; } static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct, true) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0) return -1; if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) && (ctnetlink_dump_timeout(skb, ct, false) < 0 || ctnetlink_dump_protoinfo(skb, ct, false) < 0)) return -1; return 0; } static int ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct, bool extinfo, unsigned int flags) { const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; struct nlattr *nest_parms; unsigned int event; if (portid) flags |= NLM_F_MULTI; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct), NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, NF_CT_ZONE_DIR_ORIG) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, NF_CT_ZONE_DIR_REPL) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_info(skb, ct) < 0) goto nla_put_failure; if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = { [CTA_IP_V4_SRC] = { .type = NLA_U32 }, [CTA_IP_V4_DST] = { .type = NLA_U32 }, [CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 }, [CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 }, }; #if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS) static size_t ctnetlink_proto_size(const struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; size_t len, len4 = 0; len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1); len *= 3u; /* ORIG, REPLY, MASTER */ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); len += l4proto->nlattr_size; if (l4proto->nlattr_tuple_size) { len4 = l4proto->nlattr_tuple_size(); len4 *= 3u; /* ORIG, REPLY, MASTER */ } return len + len4; } static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ + 2 * nla_total_size_64bit(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ ; } static inline int ctnetlink_secctx_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_SECMARK int ret; ret = security_secid_to_secctx(ct->secmark, NULL); if (ret < 0) return 0; return nla_total_size(0) /* CTA_SECCTX */ + nla_total_size(sizeof(char) * ret); /* CTA_SECCTX_NAME */ #else return 0; #endif } static inline size_t ctnetlink_timestamp_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) return 0; return nla_total_size(0) + 2 * nla_total_size_64bit(sizeof(uint64_t)); #else return 0; #endif } #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) { return NLMSG_ALIGN(sizeof(struct nfgenmsg)) + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ + ctnetlink_acct_size(ct) + ctnetlink_timestamp_size(ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) #if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif #ifdef CONFIG_NF_CONNTRACK_MARK + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + nla_total_size(sizeof(u64)) /* CTA_TIMESTAMP_EVENT */ #endif ; } static int ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) { const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; struct nlattr *nest_parms; struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; int err; if (events & (1 << IPCT_DESTROY)) { type = IPCTNL_MSG_CT_DELETE; group = NFNLGRP_CONNTRACK_DESTROY; } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) { type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_NEW; } else if (events) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else return 0; net = nf_ct_net(ct); if (!item->report && !nfnetlink_has_listeners(net, group)) return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); if (skb == NULL) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct), NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, NF_CT_ZONE_DIR_ORIG) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, NF_CT_ZONE_DIR_REPL) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { if (ctnetlink_dump_timeout(skb, ct, true) < 0) goto nla_put_failure; if (ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct, true) < 0) goto nla_put_failure; } else { if (ctnetlink_dump_timeout(skb, ct, false) < 0) goto nla_put_failure; if (events & (1 << IPCT_PROTOINFO) && ctnetlink_dump_protoinfo(skb, ct, false) < 0) goto nla_put_failure; if ((events & (1 << IPCT_HELPER) || nfct_help(ct)) && ctnetlink_dump_helpinfo(skb, ct) < 0) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_SECMARK if ((events & (1 << IPCT_SECMARK) || ct->secmark) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif if (events & (1 << IPCT_LABEL) && ctnetlink_dump_labels(skb, ct) < 0) goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; if (events & (1 << IPCT_SEQADJ) && ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; if (events & (1 << IPCT_SYNPROXY) && ctnetlink_dump_ct_synproxy(skb, ct) < 0) goto nla_put_failure; } #ifdef CONFIG_NF_CONNTRACK_MARK if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) goto nla_put_failure; #endif if (ctnetlink_dump_event_timestamp(skb, ct)) goto nla_put_failure; nlmsg_end(skb, nlh); err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; return 0; nla_put_failure: nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); errout: if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0) return -ENOBUFS; return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ static int ctnetlink_done(struct netlink_callback *cb) { kfree(cb->data); return 0; } struct ctnetlink_filter_u32 { u32 val; u32 mask; }; struct ctnetlink_filter { u8 family; bool zone_filter; u_int32_t orig_flags; u_int32_t reply_flags; struct nf_conntrack_tuple orig; struct nf_conntrack_tuple reply; struct nf_conntrack_zone zone; struct ctnetlink_filter_u32 mark; struct ctnetlink_filter_u32 status; }; static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = { [CTA_FILTER_ORIG_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), [CTA_FILTER_REPLY_FLAGS] = NLA_POLICY_MASK(NLA_U32, CTA_FILTER_F_ALL), }; static int ctnetlink_parse_filter(const struct nlattr *attr, struct ctnetlink_filter *filter) { struct nlattr *tb[CTA_FILTER_MAX + 1]; int ret = 0; ret = nla_parse_nested(tb, CTA_FILTER_MAX, attr, cta_filter_nla_policy, NULL); if (ret) return ret; if (tb[CTA_FILTER_ORIG_FLAGS]) filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]); if (tb[CTA_FILTER_REPLY_FLAGS]) filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]); return 0; } static int ctnetlink_parse_zone(const struct nlattr *attr, struct nf_conntrack_zone *zone); static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, u32 type, u_int8_t l3num, struct nf_conntrack_zone *zone, u_int32_t flags); static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_CONNTRACK_MARK if (cda[CTA_MARK]) { mark->val = ntohl(nla_get_be32(cda[CTA_MARK])); if (cda[CTA_MARK_MASK]) mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); else mark->mask = 0xffffffff; } else if (cda[CTA_MARK_MASK]) { return -EINVAL; } #endif return 0; } static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status, const struct nlattr * const cda[]) { if (cda[CTA_STATUS]) { status->val = ntohl(nla_get_be32(cda[CTA_STATUS])); if (cda[CTA_STATUS_MASK]) status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK])); else status->mask = status->val; /* status->val == 0? always true, else always false. */ if (status->mask == 0) return -EINVAL; } else if (cda[CTA_STATUS_MASK]) { return -EINVAL; } /* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */ BUILD_BUG_ON(__IPS_MAX_BIT >= 32); return 0; } static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { struct ctnetlink_filter *filter; int err; #ifndef CONFIG_NF_CONNTRACK_MARK if (cda[CTA_MARK] || cda[CTA_MARK_MASK]) return ERR_PTR(-EOPNOTSUPP); #endif filter = kzalloc_obj(*filter); if (filter == NULL) return ERR_PTR(-ENOMEM); filter->family = family; err = ctnetlink_filter_parse_mark(&filter->mark, cda); if (err) goto err_filter; err = ctnetlink_filter_parse_status(&filter->status, cda); if (err) goto err_filter; if (cda[CTA_ZONE]) { err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); if (err < 0) goto err_filter; filter->zone_filter = true; } if (!cda[CTA_FILTER]) return filter; err = ctnetlink_parse_filter(cda[CTA_FILTER], filter); if (err < 0) goto err_filter; if (filter->orig_flags) { if (!cda[CTA_TUPLE_ORIG]) { err = -EINVAL; goto err_filter; } err = ctnetlink_parse_tuple_filter(cda, &filter->orig, CTA_TUPLE_ORIG, filter->family, &filter->zone, filter->orig_flags); if (err < 0) goto err_filter; } if (filter->reply_flags) { if (!cda[CTA_TUPLE_REPLY]) { err = -EINVAL; goto err_filter; } err = ctnetlink_parse_tuple_filter(cda, &filter->reply, CTA_TUPLE_REPLY, filter->family, &filter->zone, filter->reply_flags); if (err < 0) goto err_filter; } return filter; err_filter: kfree(filter); return ERR_PTR(err); } static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) { return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || cda[CTA_ZONE]; } static int ctnetlink_start(struct netlink_callback *cb) { const struct nlattr * const *cda = cb->data; struct ctnetlink_filter *filter = NULL; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u8 family = nfmsg->nfgen_family; if (ctnetlink_needs_filter(family, cda)) { filter = ctnetlink_alloc_filter(cda, family); if (IS_ERR(filter)) return PTR_ERR(filter); } cb->data = filter; return 0; } static int ctnetlink_filter_match_tuple(struct nf_conntrack_tuple *filter_tuple, struct nf_conntrack_tuple *ct_tuple, u_int32_t flags, int family) { switch (family) { case NFPROTO_IPV4: if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) && filter_tuple->src.u3.ip != ct_tuple->src.u3.ip) return 0; if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) && filter_tuple->dst.u3.ip != ct_tuple->dst.u3.ip) return 0; break; case NFPROTO_IPV6: if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) && !ipv6_addr_cmp(&filter_tuple->src.u3.in6, &ct_tuple->src.u3.in6)) return 0; if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) && !ipv6_addr_cmp(&filter_tuple->dst.u3.in6, &ct_tuple->dst.u3.in6)) return 0; break; } if ((flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) && filter_tuple->dst.protonum != ct_tuple->dst.protonum) return 0; switch (ct_tuple->dst.protonum) { case IPPROTO_TCP: case IPPROTO_UDP: if ((flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) && filter_tuple->src.u.tcp.port != ct_tuple->src.u.tcp.port) return 0; if ((flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) && filter_tuple->dst.u.tcp.port != ct_tuple->dst.u.tcp.port) return 0; break; case IPPROTO_ICMP: if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) && filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type) return 0; if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) && filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code) return 0; if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) && filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id) return 0; break; case IPPROTO_ICMPV6: if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) && filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type) return 0; if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) && filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code) return 0; if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) && filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id) return 0; break; } return 1; } static int ctnetlink_filter_match(struct nf_conn *ct, void *data) { struct ctnetlink_filter *filter = data; struct nf_conntrack_tuple *tuple; u32 status; if (filter == NULL) goto out; /* Match entries of a given L3 protocol number. * If it is not specified, ie. l3proto == 0, * then match everything. */ if (filter->family && nf_ct_l3num(ct) != filter->family) goto ignore_entry; if (filter->zone_filter && !nf_ct_zone_equal_any(ct, &filter->zone)) goto ignore_entry; if (filter->orig_flags) { tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL); if (!ctnetlink_filter_match_tuple(&filter->orig, tuple, filter->orig_flags, filter->family)) goto ignore_entry; } if (filter->reply_flags) { tuple = nf_ct_tuple(ct, IP_CT_DIR_REPLY); if (!ctnetlink_filter_match_tuple(&filter->reply, tuple, filter->reply_flags, filter->family)) goto ignore_entry; } #ifdef CONFIG_NF_CONNTRACK_MARK if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif status = (u32)READ_ONCE(ct->status); if ((status & filter->status.mask) != filter->status.val) goto ignore_entry; out: return 1; ignore_entry: return 0; } static unsigned long ctnetlink_get_id(const struct nf_conn *ct) { unsigned long id = nf_ct_get_id(ct); return id ? id : 1; } static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); unsigned long last_id = cb->args[1]; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *nf_ct_evict[8]; struct nf_conn *ct; int res, i; spinlock_t *lockp; i = 0; local_bh_disable(); for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: while (i) { i--; if (nf_ct_should_gc(nf_ct_evict[i])) nf_ct_kill(nf_ct_evict[i]); nf_ct_put(nf_ct_evict[i]); } lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); if (cb->args[0] >= nf_conntrack_htable_size) { spin_unlock(lockp); goto out; } hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { /* need to defer nf_ct_kill() until lock is released */ if (i < ARRAY_SIZE(nf_ct_evict) && refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; continue; } if (!net_eq(net, nf_ct_net(ct))) continue; if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; if (cb->args[1]) { if (ctnetlink_get_id(ct) != last_id) continue; cb->args[1] = 0; } if (!ctnetlink_filter_match(ct, cb->data)) continue; res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, true, flags); if (res < 0) { cb->args[1] = ctnetlink_get_id(ct); spin_unlock(lockp); goto out; } } spin_unlock(lockp); if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: local_bh_enable(); if (last_id) { /* nf ct hash resize happened, now clear the leftover. */ if (cb->args[1] == last_id) cb->args[1] = 0; } while (i) { i--; if (nf_ct_should_gc(nf_ct_evict[i])) nf_ct_kill(nf_ct_evict[i]); nf_ct_put(nf_ct_evict[i]); } return skb->len; } static int ipv4_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t, u_int32_t flags) { if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) { if (!tb[CTA_IP_V4_SRC]) return -EINVAL; t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); } if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) { if (!tb[CTA_IP_V4_DST]) return -EINVAL; t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); } return 0; } static int ipv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t, u_int32_t flags) { if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) { if (!tb[CTA_IP_V6_SRC]) return -EINVAL; t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); } if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) { if (!tb[CTA_IP_V6_DST]) return -EINVAL; t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); } return 0; } static int ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple, u_int32_t flags) { struct nlattr *tb[CTA_IP_MAX+1]; int ret = 0; ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, cta_ip_nla_policy, NULL); if (ret < 0) return ret; switch (tuple->src.l3num) { case NFPROTO_IPV4: ret = ipv4_nlattr_to_tuple(tb, tuple, flags); break; case NFPROTO_IPV6: ret = ipv6_nlattr_to_tuple(tb, tuple, flags); break; } return ret; } static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_NUM] = { .type = NLA_U8 }, }; static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_tuple *tuple, u_int32_t flags) { const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTO_MAX+1]; int ret = 0; ret = nla_parse_nested_deprecated(tb, CTA_PROTO_MAX, attr, proto_nla_policy, NULL); if (ret < 0) return ret; if (!(flags & CTA_FILTER_FLAG(CTA_PROTO_NUM))) return 0; if (!tb[CTA_PROTO_NUM]) return -EINVAL; tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); rcu_read_lock(); l4proto = nf_ct_l4proto_find(tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested_deprecated(attr, CTA_PROTO_MAX, l4proto->nla_policy, NULL); if (ret == 0) ret = l4proto->nlattr_to_tuple(tb, tuple, flags); } rcu_read_unlock(); return ret; } static int ctnetlink_parse_zone(const struct nlattr *attr, struct nf_conntrack_zone *zone) { nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID, NF_CT_DEFAULT_ZONE_DIR, 0); #ifdef CONFIG_NF_CONNTRACK_ZONES if (attr) zone->id = ntohs(nla_get_be16(attr)); #else if (attr) return -EOPNOTSUPP; #endif return 0; } static int ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type, struct nf_conntrack_zone *zone) { int ret; if (zone->id != NF_CT_DEFAULT_ZONE_ID) return -EINVAL; ret = ctnetlink_parse_zone(attr, zone); if (ret < 0) return ret; if (type == CTA_TUPLE_REPLY) zone->dir = NF_CT_ZONE_DIR_REPL; else zone->dir = NF_CT_ZONE_DIR_ORIG; return 0; } static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { [CTA_TUPLE_IP] = { .type = NLA_NESTED }, [CTA_TUPLE_PROTO] = { .type = NLA_NESTED }, [CTA_TUPLE_ZONE] = { .type = NLA_U16 }, }; #define CTA_FILTER_F_ALL_CTA_PROTO \ (CTA_FILTER_F_CTA_PROTO_SRC_PORT | \ CTA_FILTER_F_CTA_PROTO_DST_PORT | \ CTA_FILTER_F_CTA_PROTO_ICMP_TYPE | \ CTA_FILTER_F_CTA_PROTO_ICMP_CODE | \ CTA_FILTER_F_CTA_PROTO_ICMP_ID | \ CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \ CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \ CTA_FILTER_F_CTA_PROTO_ICMPV6_ID) static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, u32 type, u_int8_t l3num, struct nf_conntrack_zone *zone, u_int32_t flags) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; memset(tuple, 0, sizeof(*tuple)); err = nla_parse_nested_deprecated(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy, NULL); if (err < 0) return err; if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) return -EOPNOTSUPP; tuple->src.l3num = l3num; if (flags & CTA_FILTER_FLAG(CTA_IP_DST) || flags & CTA_FILTER_FLAG(CTA_IP_SRC)) { if (!tb[CTA_TUPLE_IP]) return -EINVAL; err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple, flags); if (err < 0) return err; } if (flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) { if (!tb[CTA_TUPLE_PROTO]) return -EINVAL; err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple, flags); if (err < 0) return err; } else if (flags & CTA_FILTER_FLAG(ALL_CTA_PROTO)) { /* Can't manage proto flags without a protonum */ return -EINVAL; } if ((flags & CTA_FILTER_FLAG(CTA_TUPLE_ZONE)) && tb[CTA_TUPLE_ZONE]) { if (!zone) return -EINVAL; err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE], type, zone); if (err < 0) return err; } /* orig and expect tuples get DIR_ORIGINAL */ if (type == CTA_TUPLE_REPLY) tuple->dst.dir = IP_CT_DIR_REPLY; else tuple->dst.dir = IP_CT_DIR_ORIGINAL; return 0; } static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, u32 type, u_int8_t l3num, struct nf_conntrack_zone *zone) { return ctnetlink_parse_tuple_filter(cda, tuple, type, l3num, zone, CTA_FILTER_FLAG(ALL)); } static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { [CTA_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, }; static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, struct nlattr **helpinfo) { int err; struct nlattr *tb[CTA_HELP_MAX+1]; err = nla_parse_nested_deprecated(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL); if (err < 0) return err; if (!tb[CTA_HELP_NAME]) return -EINVAL; *helper_name = nla_data(tb[CTA_HELP_NAME]); if (tb[CTA_HELP_INFO]) *helpinfo = tb[CTA_HELP_INFO]; return 0; } static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, [CTA_STATUS] = { .type = NLA_U32 }, [CTA_PROTOINFO] = { .type = NLA_NESTED }, [CTA_HELP] = { .type = NLA_NESTED }, [CTA_NAT_SRC] = { .type = NLA_NESTED }, [CTA_TIMEOUT] = { .type = NLA_U32 }, [CTA_MARK] = { .type = NLA_U32 }, [CTA_ID] = { .type = NLA_U32 }, [CTA_NAT_DST] = { .type = NLA_NESTED }, [CTA_TUPLE_MASTER] = { .type = NLA_NESTED }, [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NLA_NESTED }, [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, [CTA_LABELS] = { .type = NLA_BINARY, .len = NF_CT_LABELS_MAX_SIZE }, [CTA_LABELS_MASK] = { .type = NLA_BINARY, .len = NF_CT_LABELS_MAX_SIZE }, [CTA_FILTER] = { .type = NLA_NESTED }, [CTA_STATUS_MASK] = { .type = NLA_U32 }, [CTA_TIMESTAMP_EVENT] = { .type = NLA_REJECT }, }; static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) { return ctnetlink_filter_match(ct, data); } static int ctnetlink_flush_conntrack(struct net *net, const struct nlattr * const cda[], u32 portid, int report, u8 family) { struct ctnetlink_filter *filter = NULL; struct nf_ct_iter_data iter = { .net = net, .portid = portid, .report = report, }; if (ctnetlink_needs_filter(family, cda)) { filter = ctnetlink_alloc_filter(cda, family); if (IS_ERR(filter)) return PTR_ERR(filter); iter.data = filter; } nf_ct_iterate_cleanup_net(ctnetlink_flush_iterate, &iter); kfree(filter); return 0; } static int ctnetlink_del_conntrack(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { u8 family = info->nfmsg->nfgen_family; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; struct nf_conn *ct; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) return err; if (cda[CTA_TUPLE_ORIG] && !cda[CTA_FILTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, family, &zone); else if (cda[CTA_TUPLE_REPLY] && !cda[CTA_FILTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, family, &zone); else { u8 u3 = info->nfmsg->version || cda[CTA_FILTER] ? family : AF_UNSPEC; return ctnetlink_flush_conntrack(info->net, cda, NETLINK_CB(skb).portid, nlmsg_report(info->nlh), u3); } if (err < 0) return err; h = nf_conntrack_find_get(info->net, &zone, &tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); if (cda[CTA_ID]) { __be32 id = nla_get_be32(cda[CTA_ID]); if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } } nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); nf_ct_put(ct); return 0; } static int ctnetlink_get_conntrack(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; struct sk_buff *skb2; struct nf_conn *ct; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = ctnetlink_start, .dump = ctnetlink_dump_table, .done = ctnetlink_done, .data = (void *)cda, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) return err; if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3, &zone); else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3, &zone); else return -EINVAL; if (err < 0) return err; h = nf_conntrack_find_get(info->net, &zone, &tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) { nf_ct_put(ct); return -ENOMEM; } err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct, true, 0); nf_ct_put(ct); if (err <= 0) { kfree_skb(skb2); return -ENOMEM; } return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } #ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_dump_one_entry(struct sk_buff *skb, struct netlink_callback *cb, struct nf_conn *ct, bool dying) { struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u8 l3proto = nfmsg->nfgen_family; int res; if (l3proto && nf_ct_l3num(ct) != l3proto) return 0; if (ctx->last_id) { if (ctnetlink_get_id(ct) != ctx->last_id) return 0; ctx->last_id = 0; } /* We can't dump extension info for the unconfirmed * list because unconfirmed conntracks can have * ct->ext reallocated (and thus freed). * * In the dying list case ct->ext can't be free'd * until after we drop pcpu->lock. */ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, dying, 0); if (res < 0) ctx->last_id = ctnetlink_get_id(ct); return res; } #endif static int ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) { return 0; } static int ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) { struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; #ifdef CONFIG_NF_CONNTRACK_EVENTS const struct net *net = sock_net(skb->sk); struct nf_conntrack_net_ecache *ecache_net; unsigned long last_id = ctx->last_id; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; #endif if (ctx->done) return 0; ctx->last_id = 0; #ifdef CONFIG_NF_CONNTRACK_EVENTS ecache_net = nf_conn_pernet_ecache(net); spin_lock_bh(&ecache_net->dying_lock); hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) { struct nf_conn *ct; int res; ct = nf_ct_tuplehash_to_ctrack(h); if (last_id && last_id != ctnetlink_get_id(ct)) continue; res = ctnetlink_dump_one_entry(skb, cb, ct, true); if (res < 0) { spin_unlock_bh(&ecache_net->dying_lock); return skb->len; } last_id = 0; } spin_unlock_bh(&ecache_net->dying_lock); #endif ctx->done = true; return skb->len; } static int ctnetlink_get_ct_dying(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_dying, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } return -EOPNOTSUPP; } static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_dump_unconfirmed, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } return -EOPNOTSUPP; } #if IS_ENABLED(CONFIG_NF_NAT) static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) __must_hold(RCU) { const struct nf_nat_hook *nat_hook; int err; nat_hook = rcu_dereference(nf_nat_hook); if (!nat_hook) { #ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat") < 0) { nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook) return -EAGAIN; #endif return -EOPNOTSUPP; } err = nat_hook->parse_nat_setup(ct, manip, attr); if (err == -EAGAIN) { #ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); return -EOPNOTSUPP; } nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); #else err = -EOPNOTSUPP; #endif } return err; } #endif static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS]))); } static int ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { #if IS_ENABLED(CONFIG_NF_NAT) int ret; if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) return 0; ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) return ret; return ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC, cda[CTA_NAT_SRC]); #else if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) return 0; return -EOPNOTSUPP; #endif } static int ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); char *helpname = NULL; struct nlattr *helpinfo = NULL; int err; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) return err; /* don't change helper of sibling connections */ if (ct->master) { /* If we try to change the helper to the same thing twice, * treat the second attempt as a no-op instead of returning * an error. */ err = -EBUSY; if (help) { rcu_read_lock(); helper = rcu_dereference(help->helper); if (helper && !strcmp(helper->name, helpname)) err = 0; rcu_read_unlock(); } return err; } if (!strcmp(helpname, "")) { if (help && help->helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); RCU_INIT_POINTER(help->helper, NULL); } return 0; } rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); return -EOPNOTSUPP; } if (help) { if (rcu_access_pointer(help->helper) == helper) { /* update private helper data if allowed. */ if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); err = 0; } else err = -EBUSY; } else { /* we cannot set a helper for an existing conntrack */ err = -EOPNOTSUPP; } rcu_read_unlock(); return err; } static int ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ); } #if defined(CONFIG_NF_CONNTRACK_MARK) static void ctnetlink_change_mark(struct nf_conn *ct, const struct nlattr * const cda[]) { u32 mark, newmark, mask = 0; if (cda[CTA_MARK_MASK]) mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); mark = ntohl(nla_get_be32(cda[CTA_MARK])); newmark = (READ_ONCE(ct->mark) & mask) ^ mark; if (newmark != READ_ONCE(ct->mark)) WRITE_ONCE(ct->mark, newmark); } #endif static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED }, [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED }, }; static int ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) { const struct nlattr *attr = cda[CTA_PROTOINFO]; const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTOINFO_MAX+1]; int err = 0; err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, NULL); if (err < 0) return err; l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->from_nlattr) err = l4proto->from_nlattr(tb, ct); return err; } static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = { [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 }, [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 }, [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 }, }; static int change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr) { int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; err = nla_parse_nested_deprecated(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL); if (err < 0) return err; if (!cda[CTA_SEQADJ_CORRECTION_POS]) return -EINVAL; seq->correction_pos = ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS])); if (!cda[CTA_SEQADJ_OFFSET_BEFORE]) return -EINVAL; seq->offset_before = ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE])); if (!cda[CTA_SEQADJ_OFFSET_AFTER]) return -EINVAL; seq->offset_after = ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER])); return 0; } static int ctnetlink_change_seq_adj(struct nf_conn *ct, const struct nlattr * const cda[]) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); int ret = 0; if (!seqadj) return 0; spin_lock_bh(&ct->lock); if (cda[CTA_SEQ_ADJ_ORIG]) { ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL], cda[CTA_SEQ_ADJ_ORIG]); if (ret < 0) goto err; set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } if (cda[CTA_SEQ_ADJ_REPLY]) { ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY], cda[CTA_SEQ_ADJ_REPLY]); if (ret < 0) goto err; set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } spin_unlock_bh(&ct->lock); return 0; err: spin_unlock_bh(&ct->lock); return ret; } static const struct nla_policy synproxy_policy[CTA_SYNPROXY_MAX + 1] = { [CTA_SYNPROXY_ISN] = { .type = NLA_U32 }, [CTA_SYNPROXY_ITS] = { .type = NLA_U32 }, [CTA_SYNPROXY_TSOFF] = { .type = NLA_U32 }, }; static int ctnetlink_change_synproxy(struct nf_conn *ct, const struct nlattr * const cda[]) { struct nf_conn_synproxy *synproxy = nfct_synproxy(ct); struct nlattr *tb[CTA_SYNPROXY_MAX + 1]; int err; if (!synproxy) return 0; err = nla_parse_nested_deprecated(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY], synproxy_policy, NULL); if (err < 0) return err; if (!tb[CTA_SYNPROXY_ISN] || !tb[CTA_SYNPROXY_ITS] || !tb[CTA_SYNPROXY_TSOFF]) return -EINVAL; synproxy->isn = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ISN])); synproxy->its = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ITS])); synproxy->tsoff = ntohl(nla_get_be32(tb[CTA_SYNPROXY_TSOFF])); return 0; } static int ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_CONNTRACK_LABELS size_t len = nla_len(cda[CTA_LABELS]); const void *mask = cda[CTA_LABELS_MASK]; if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ return -EINVAL; if (mask) { if (nla_len(cda[CTA_LABELS_MASK]) == 0 || nla_len(cda[CTA_LABELS_MASK]) != len) return -EINVAL; mask = nla_data(cda[CTA_LABELS_MASK]); } len /= sizeof(u32); return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); #else return -EOPNOTSUPP; #endif } static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) { int err; /* only allow NAT changes and master assignation for new conntracks */ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST] || cda[CTA_TUPLE_MASTER]) return -EOPNOTSUPP; if (cda[CTA_HELP]) { err = ctnetlink_change_helper(ct, cda); if (err < 0) return err; } if (cda[CTA_TIMEOUT]) { err = ctnetlink_change_timeout(ct, cda); if (err < 0) return err; } if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) return err; } if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ctnetlink_change_mark(ct, cda); #endif if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { err = ctnetlink_change_seq_adj(ct, cda); if (err < 0) return err; } if (cda[CTA_SYNPROXY]) { err = ctnetlink_change_synproxy(ct, cda); if (err < 0) return err; } if (cda[CTA_LABELS]) { err = ctnetlink_attach_labels(ct, cda); if (err < 0) return err; } return 0; } static struct nf_conn * ctnetlink_create_conntrack(struct net *net, const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, u8 u3) { struct nf_conn *ct; int err = -EINVAL; struct nf_conntrack_helper *helper; struct nf_conn_tstamp *tstamp; u64 timeout; ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); if (!cda[CTA_TIMEOUT]) goto err1; rcu_read_lock(); if (cda[CTA_HELP]) { char *helpname = NULL; struct nlattr *helpinfo = NULL; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) goto err2; helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { err = -EOPNOTSUPP; goto err1; } rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; goto err2; } rcu_read_unlock(); #endif err = -EOPNOTSUPP; goto err1; } else { struct nf_conn_help *help; help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help == NULL) { err = -ENOMEM; goto err2; } /* set private helper data if allowed. */ if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); /* disable helper auto-assignment for this entry */ ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } } err = ctnetlink_setup_nat(ct, cda); if (err < 0) goto err2; nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); nf_ct_labels_ext_add(ct); nfct_seqadj_ext_add(ct); nfct_synproxy_ext_add(ct); /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; __nf_ct_set_timeout(ct, timeout); if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) goto err2; } if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { err = ctnetlink_change_seq_adj(ct, cda); if (err < 0) goto err2; } memset(&ct->proto, 0, sizeof(ct->proto)); if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) goto err2; } if (cda[CTA_SYNPROXY]) { err = ctnetlink_change_synproxy(ct, cda); if (err < 0) goto err2; } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ctnetlink_change_mark(ct, cda); #endif /* setup master conntrack: this is a confirmed expectation */ if (cda[CTA_TUPLE_MASTER]) { struct nf_conntrack_tuple master; struct nf_conntrack_tuple_hash *master_h; struct nf_conn *master_ct; err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3, NULL); if (err < 0) goto err2; master_h = nf_conntrack_find_get(net, zone, &master); if (master_h == NULL) { err = -ENOENT; goto err2; } master_ct = nf_ct_tuplehash_to_ctrack(master_h); __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } tstamp = nf_conn_tstamp_find(ct); if (tstamp) tstamp->start = ktime_get_real_ns(); err = nf_conntrack_hash_check_insert(ct); if (err < 0) goto err3; rcu_read_unlock(); return ct; err3: if (ct->master) nf_ct_put(ct->master); err2: rcu_read_unlock(); err1: nf_conntrack_free(ct); return ERR_PTR(err); } static int ctnetlink_new_conntrack(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_zone zone; struct nf_conn *ct; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) return err; if (cda[CTA_TUPLE_ORIG]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_REPLY]) { err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_ORIG]) h = nf_conntrack_find_get(info->net, &zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) h = nf_conntrack_find_get(info->net, &zone, &rtuple); if (h == NULL) { err = -ENOENT; if (info->nlh->nlmsg_flags & NLM_F_CREATE) { enum ip_conntrack_events events; if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) return -EINVAL; if (otuple.dst.protonum != rtuple.dst.protonum) return -EINVAL; ct = ctnetlink_create_conntrack(info->net, &zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) return PTR_ERR(ct); err = 0; if (test_bit(IPS_EXPECTED_BIT, &ct->status)) events = 1 << IPCT_RELATED; else events = 1 << IPCT_NEW; if (cda[CTA_LABELS] && ctnetlink_attach_labels(ct, cda) == 0) events |= (1 << IPCT_LABEL); nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_SEQADJ) | (1 << IPCT_MARK) | (1 << IPCT_SYNPROXY) | events, ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); nf_ct_put(ct); } return err; } /* implicit 'else' */ err = -EEXIST; ct = nf_ct_tuplehash_to_ctrack(h); if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) { err = ctnetlink_change_conntrack(ct, cda); if (err == 0) { nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_LABEL) | (1 << IPCT_PROTOINFO) | (1 << IPCT_SEQADJ) | (1 << IPCT_MARK) | (1 << IPCT_SYNPROXY), ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); } } nf_ct_put(ct); return err; } static int ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS_CPU); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, htons(cpu)); if (!nlh) goto nlmsg_failure; if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || nla_put_be32(skb, CTA_STATS_DROP, htonl(st->drop)) || nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) || nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) || nla_put_be32(skb, CTA_STATS_SEARCH_RESTART, htonl(st->search_restart)) || nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE, htonl(st->clash_resolve)) || nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG, htonl(st->chaintoolong))) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nla_put_failure: nlmsg_failure: nlmsg_cancel(skb, nlh); return -1; } static int ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) { int cpu; struct net *net = sock_net(skb->sk); if (cb->args[0] == nr_cpu_ids) return 0; for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { const struct ip_conntrack_stat *st; if (!cpu_possible(cpu)) continue; st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_ct_stat_cpu_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; } cb->args[0] = cpu; return skb->len; } static int ctnetlink_stat_ct_cpu(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_ct_stat_cpu_dump, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } return 0; } static int ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks; struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; nr_conntracks = nf_conntrack_count(net); if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) goto nla_put_failure; if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max))) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nla_put_failure: nlmsg_failure: nlmsg_cancel(skb, nlh); return -1; } static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct sk_buff *skb2; int err; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) return -ENOMEM; err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), sock_net(skb->sk)); if (err <= 0) { kfree_skb(skb2); return -ENOMEM; } return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, [CTA_EXPECT_MASK] = { .type = NLA_NESTED }, [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, [CTA_EXPECT_ID] = { .type = NLA_U32 }, [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, [CTA_EXPECT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_CT_EXPECT_MASK), [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, }; static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT static size_t ctnetlink_glue_build_size(const struct nf_conn *ct) { return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) + ctnetlink_acct_size(ct) + ctnetlink_timestamp_size(ct) #if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif #ifdef CONFIG_NF_CONNTRACK_MARK + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) ; } static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, NF_CT_ZONE_DIR_ORIG) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, NF_CT_ZONE_DIR_REPL) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0) goto nla_put_failure; if (ctnetlink_dump_timeout(skb, ct, false) < 0) goto nla_put_failure; if (ctnetlink_dump_protoinfo(skb, ct, false) < 0) goto nla_put_failure; if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; if (ctnetlink_dump_helpinfo(skb, ct) < 0) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_SECMARK if (ct->secmark && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif if (ct->master && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; if ((ct->status & IPS_SEQ_ADJUST) && ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; if (ctnetlink_dump_ct_synproxy(skb, ct) < 0) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK if (ctnetlink_dump_mark(skb, ct, true) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static int ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, u_int16_t ct_attr, u_int16_t ct_info_attr) { struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, ct_attr); if (!nest_parms) goto nla_put_failure; if (__ctnetlink_glue_build(skb, ct) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static int ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) { unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); unsigned long d = ct->status ^ status; if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) /* SEEN_REPLY bit can only be set */ return -EBUSY; if (d & IPS_ASSURED && !(status & IPS_ASSURED)) /* ASSURED bit can only be set */ return -EBUSY; /* This check is less strict than ctnetlink_change_status() * because callers often flip IPS_EXPECTED bits when sending * an NFQA_CT attribute to the kernel. So ignore the * unchangeable bits but do not error out. Also user programs * are allowed to clear the bits that they are allowed to change. */ __nf_ct_change_status(ct, status, ~status); return 0; } static int ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) { int err; if (cda[CTA_TIMEOUT]) { err = ctnetlink_change_timeout(ct, cda); if (err < 0) return err; } if (cda[CTA_STATUS]) { err = ctnetlink_update_status(ct, cda); if (err < 0) return err; } if (cda[CTA_HELP]) { err = ctnetlink_change_helper(ct, cda); if (err < 0) return err; } if (cda[CTA_LABELS]) { err = ctnetlink_attach_labels(ct, cda); if (err < 0) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) { ctnetlink_change_mark(ct, cda); } #endif return 0; } static int ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct) { struct nlattr *cda[CTA_MAX+1]; int ret; ret = nla_parse_nested_deprecated(cda, CTA_MAX, attr, ct_nla_policy, NULL); if (ret < 0) return ret; return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); } static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda, const struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { int err; err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE, nf_ct_l3num(ct), NULL); if (err < 0) return err; return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK, nf_ct_l3num(ct), NULL); } static int ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report) { struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; struct nf_conntrack_expect *exp; int err; err = nla_parse_nested_deprecated(cda, CTA_EXPECT_MAX, attr, exp_nla_policy, NULL); if (err < 0) return err; err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda, ct, &tuple, &mask); if (err < 0) return err; exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); err = nf_ct_expect_related_report(exp, portid, report, 0); nf_ct_expect_put(exp); return err; } static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, int diff) { if (!(ct->status & IPS_NAT_MASK)) return; nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff); } static const struct nfnl_ct_hook ctnetlink_glue_hook = { .build_size = ctnetlink_glue_build_size, .build = ctnetlink_glue_build, .parse = ctnetlink_glue_parse, .attach_expect = ctnetlink_glue_attach_expect, .seq_adjust = ctnetlink_glue_seqadj, }; #endif /* CONFIG_NETFILTER_NETLINK_GLUE_CT */ /*********************************************************************** * EXPECT ***********************************************************************/ static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, u32 type) { struct nlattr *nest_parms; nest_parms = nla_nest_start(skb, type); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } static int ctnetlink_exp_dump_mask(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple_mask *mask) { const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple m; struct nlattr *nest_parms; int ret; memset(&m, 0xFF, sizeof(m)); memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); m.src.u.all = mask->src.u.all; m.src.l3num = tuple->src.l3num; m.dst.protonum = tuple->dst.protonum; nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK); if (!nest_parms) goto nla_put_failure; rcu_read_lock(); ret = ctnetlink_dump_tuples_ip(skb, &m); if (ret >= 0) { l4proto = nf_ct_l4proto_find(tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); } rcu_read_unlock(); if (unlikely(ret < 0)) goto nla_put_failure; nla_nest_end(skb, nest_parms); return 0; nla_put_failure: return -1; } #if IS_ENABLED(CONFIG_NF_NAT) static const union nf_inet_addr any_addr; #endif static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) { static siphash_aligned_key_t exp_id_seed; unsigned long a, b, c, d; net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); a = (unsigned long)exp; b = (unsigned long)exp->helper; c = (unsigned long)exp->master; d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); #ifdef CONFIG_64BIT return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); #else return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); #endif } static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) { struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; struct nf_conntrack_helper *helper; #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; #endif struct nf_ct_helper_expectfn *expfn; if (timeout < 0) timeout = 0; if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nla_put_failure; if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0) goto nla_put_failure; if (ctnetlink_exp_dump_tuple(skb, &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, CTA_EXPECT_MASTER) < 0) goto nla_put_failure; #if IS_ENABLED(CONFIG_NF_NAT) if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || exp->saved_proto.all) { nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT); if (!nest_parms) goto nla_put_failure; if (nla_put_be32(skb, CTA_EXPECT_NAT_DIR, htonl(exp->dir))) goto nla_put_failure; nat_tuple.src.l3num = nf_ct_l3num(master); nat_tuple.src.u3 = exp->saved_addr; nat_tuple.dst.protonum = nf_ct_protonum(master); nat_tuple.src.u = exp->saved_proto; if (ctnetlink_exp_dump_tuple(skb, &nat_tuple, CTA_EXPECT_NAT_TUPLE) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; helper = rcu_dereference(exp->helper); if (helper && nla_put_string(skb, CTA_EXPECT_HELP_NAME, helper->name)) goto nla_put_failure; expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); if (expfn != NULL && nla_put_string(skb, CTA_EXPECT_FN, expfn->name)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, exp->tuple.src.l3num, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } #ifdef CONFIG_NF_CONNTRACK_EVENTS static int ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) { struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int type, group; int flags = 0; if (events & (1 << IPEXP_DESTROY)) { type = IPCTNL_MSG_EXP_DELETE; group = NFNLGRP_CONNTRACK_EXP_DESTROY; } else if (events & (1 << IPEXP_NEW)) { type = IPCTNL_MSG_EXP_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_EXP_NEW; } else return 0; if (!item->report && !nfnetlink_has_listeners(net, group)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (skb == NULL) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, exp->tuple.src.l3num, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(net, 0, 0, -ENOBUFS); return 0; } #endif static unsigned long ctnetlink_exp_id(const struct nf_conntrack_expect *exp) { unsigned long id = (unsigned long)exp; id += nf_ct_get_id(exp->master); id += exp->class; return id ? id : 1; } static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; unsigned long last_id = cb->args[1]; struct nf_conntrack_expect *exp; rcu_read_lock(); for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (!net_eq(nf_ct_net(exp->master), net)) continue; if (cb->args[1]) { if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { cb->args[1] = ctnetlink_exp_id(exp); goto out; } } if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: rcu_read_unlock(); return skb->len; } static int ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nf_conn *ct = cb->data; struct nf_conn_help *help; u_int8_t l3proto = nfmsg->nfgen_family; unsigned long last_id = cb->args[1]; struct nf_conntrack_expect *exp; if (cb->args[0]) return 0; help = nfct_help(ct); if (!help) return 0; rcu_read_lock(); restart: hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (cb->args[1]) { if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { cb->args[1] = ctnetlink_exp_id(exp); goto out; } } if (cb->args[1]) { cb->args[1] = 0; goto restart; } cb->args[0] = 1; out: rcu_read_unlock(); return skb->len; } static int ctnetlink_dump_exp_ct_start(struct netlink_callback *cb) { struct nf_conn *ct = cb->data; if (!refcount_inc_not_zero(&ct->ct_general.use)) return -ENOENT; return 0; } static int ctnetlink_dump_exp_ct_done(struct netlink_callback *cb) { struct nf_conn *ct = cb->data; if (ct) nf_ct_put(ct); return 0; } static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { int err; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, .start = ctnetlink_dump_exp_ct_start, .done = ctnetlink_dump_exp_ct_done, }; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3, NULL); if (err < 0) return err; err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); if (err < 0) return err; h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); /* No expectation linked to this connection tracking. */ if (!nfct_help(ct)) { nf_ct_put(ct); return 0; } c.data = ct; err = netlink_dump_start(ctnl, skb, nlh, &c); nf_ct_put(ct); return err; } static int ctnetlink_get_expect(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nf_conntrack_zone zone; struct sk_buff *skb2; int err; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) return ctnetlink_dump_exp_ct(info->net, info->sk, skb, info->nlh, cda, info->extack); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); if (err < 0) return err; if (cda[CTA_EXPECT_TUPLE]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3, NULL); else if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3, NULL); else return -EINVAL; if (err < 0) return err; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_expect_find_get(info->net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); kfree_skb(skb2); return -ENOENT; } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); spin_unlock_bh(&nf_conntrack_expect_lock); kfree_skb(skb2); return -ENOENT; } } rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); spin_unlock_bh(&nf_conntrack_expect_lock); if (err <= 0) { kfree_skb(skb2); return -ENOMEM; } return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) { struct nf_conntrack_helper *helper; const char *name = data; helper = rcu_dereference(exp->helper); if (!helper) return false; return strcmp(helper->name, name) == 0; } static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data) { return true; } static int ctnetlink_del_expect(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; int err; if (cda[CTA_EXPECT_TUPLE]) { /* delete a single expect by tuple */ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); if (err < 0) return err; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3, NULL); if (err < 0) return err; spin_lock_bh(&nf_conntrack_expect_lock); /* bump usage count to 2 */ exp = nf_ct_expect_find_get(info->net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); spin_unlock_bh(&nf_conntrack_expect_lock); return -ENOENT; } } /* after list removal, usage count == 1 */ if (timer_delete(&exp->timeout)) { nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); nf_ct_expect_put(exp); } spin_unlock_bh(&nf_conntrack_expect_lock); /* have to put what we 'get' above. * after this line usage count == 0 */ nf_ct_expect_put(exp); } else if (cda[CTA_EXPECT_HELP_NAME]) { char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]); nf_ct_expect_iterate_net(info->net, expect_iter_name, name, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); } else { /* This basically means we have to flush everything*/ nf_ct_expect_iterate_net(info->net, expect_iter_all, NULL, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); } return 0; } static int ctnetlink_change_expect(struct nf_conntrack_expect *x, const struct nlattr * const cda[]) { if (cda[CTA_EXPECT_TIMEOUT]) { if (!timer_delete(&x->timeout)) return -ETIME; x->timeout.expires = jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; add_timer(&x->timeout); } return 0; } #if IS_ENABLED(CONFIG_NF_NAT) static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { [CTA_EXPECT_NAT_DIR] = NLA_POLICY_MAX(NLA_BE32, IP_CT_DIR_REPLY), [CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED }, }; #endif static int ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_expect *exp, u_int8_t u3) { #if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *tb[CTA_EXPECT_NAT_MAX+1]; struct nf_conntrack_tuple nat_tuple = {}; int err; err = nla_parse_nested_deprecated(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy, NULL); if (err < 0) return err; if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE]) return -EINVAL; err = ctnetlink_parse_tuple((const struct nlattr * const *)tb, &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3, NULL); if (err < 0) return err; exp->saved_addr = nat_tuple.src.u3; exp->saved_proto = nat_tuple.src.u; exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR])); return 0; #else return -EOPNOTSUPP; #endif } static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { struct net *net = read_pnet(&ct->ct_net); struct nf_conntrack_helper *helper; struct nf_conntrack_expect *exp; struct nf_conn_help *help; u32 class = 0; int err; help = nfct_help(ct); if (!help) return ERR_PTR(-EOPNOTSUPP); helper = rcu_dereference(help->helper); if (!helper) return ERR_PTR(-EOPNOTSUPP); if (cda[CTA_EXPECT_CLASS]) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) return ERR_PTR(-EINVAL); } exp = nf_ct_expect_alloc(ct); if (!exp) return ERR_PTR(-ENOMEM); if (cda[CTA_EXPECT_FLAGS]) { exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); exp->flags &= ~NF_CT_EXPECT_USERSPACE; } else { exp->flags = 0; } if (cda[CTA_EXPECT_FN]) { const char *name = nla_data(cda[CTA_EXPECT_FN]); struct nf_ct_helper_expectfn *expfn; expfn = nf_ct_helper_expectfn_find_by_name(name); if (expfn == NULL) { err = -EINVAL; goto err_out; } exp->expectfn = expfn->expectfn; } else exp->expectfn = NULL; exp->class = class; exp->master = ct; write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; #endif rcu_assign_pointer(exp->helper, helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; if (cda[CTA_EXPECT_NAT]) { err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT], exp, nf_ct_l3num(ct)); if (err < 0) goto err_out; #if IS_ENABLED(CONFIG_NF_NAT) } else { memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); exp->dir = 0; #endif } return exp; err_out: nf_ct_expect_put(exp); return ERR_PTR(err); } static int ctnetlink_create_expect(struct net *net, const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], u_int8_t u3, u32 portid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; struct nf_conntrack_expect *exp; struct nf_conn *ct; int err; /* caller guarantees that those three CTA_EXPECT_* exist */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3, NULL); if (err < 0) return err; err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3, NULL); if (err < 0) return err; err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3, NULL); if (err < 0) return err; /* Look for master conntrack of this expectation */ h = nf_conntrack_find_get(net, zone, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; } err = nf_ct_expect_related_report(exp, portid, report, 0); nf_ct_expect_put(exp); err_rcu: rcu_read_unlock(); nf_ct_put(ct); return err; } static int ctnetlink_new_expect(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { u_int8_t u3 = info->nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nf_conntrack_zone zone; int err; if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK] || !cda[CTA_EXPECT_MASTER]) return -EINVAL; err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); if (err < 0) return err; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3, NULL); if (err < 0) return err; spin_lock_bh(&nf_conntrack_expect_lock); exp = __nf_ct_expect_find(info->net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); err = -ENOENT; if (info->nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(info->net, &zone, cda, u3, NETLINK_CB(skb).portid, nlmsg_report(info->nlh)); } return err; } err = -EEXIST; if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) err = ctnetlink_change_expect(exp, cda); spin_unlock_bh(&nf_conntrack_expect_lock); return err; } static int ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_EXP_GET_STATS_CPU); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, htons(cpu)); if (!nlh) goto nlmsg_failure; if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) || nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) || nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete))) goto nla_put_failure; nlmsg_end(skb, nlh); return skb->len; nla_put_failure: nlmsg_failure: nlmsg_cancel(skb, nlh); return -1; } static int ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) { int cpu; struct net *net = sock_net(skb->sk); if (cb->args[0] == nr_cpu_ids) return 0; for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { const struct ip_conntrack_stat *st; if (!cpu_possible(cpu)) continue; st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; } cb->args[0] = cpu; return skb->len; } static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnetlink_exp_stat_cpu_dump, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } return 0; } #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { .ct_event = ctnetlink_conntrack_event, .exp_event = ctnetlink_expect_event, }; #endif static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, .type = NFNL_CB_MUTEX, .attr_count = CTA_MAX, .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, .type = NFNL_CB_MUTEX, .attr_count = CTA_MAX, .policy = ct_nla_policy }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, .type = NFNL_CB_MUTEX, .attr_count = CTA_MAX, .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, .type = NFNL_CB_MUTEX, .attr_count = CTA_MAX, .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu, .type = NFNL_CB_MUTEX, }, [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct, .type = NFNL_CB_MUTEX, }, [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying, .type = NFNL_CB_MUTEX, }, [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed, .type = NFNL_CB_MUTEX, }, }; static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, .type = NFNL_CB_MUTEX, .attr_count = CTA_EXPECT_MAX, .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, .type = NFNL_CB_MUTEX, .attr_count = CTA_EXPECT_MAX, .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, .type = NFNL_CB_MUTEX, .attr_count = CTA_EXPECT_MAX, .policy = exp_nla_policy }, [IPCTNL_MSG_EXP_GET_STATS_CPU] = { .call = ctnetlink_stat_exp_cpu, .type = NFNL_CB_MUTEX, }, }; static const struct nfnetlink_subsystem ctnl_subsys = { .name = "conntrack", .subsys_id = NFNL_SUBSYS_CTNETLINK, .cb_count = IPCTNL_MSG_MAX, .cb = ctnl_cb, }; static const struct nfnetlink_subsystem ctnl_exp_subsys = { .name = "conntrack_expect", .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, .cb_count = IPCTNL_MSG_EXP_MAX, .cb = ctnl_exp_cb, }; MODULE_ALIAS("ip_conntrack_netlink"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); static int __net_init ctnetlink_net_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS nf_conntrack_register_notifier(net, &ctnl_notifier); #endif return 0; } static void ctnetlink_net_pre_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS nf_conntrack_unregister_notifier(net); #endif } static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, .pre_exit = ctnetlink_net_pre_exit, }; static int __init ctnetlink_init(void) { int ret; NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { pr_err("ctnetlink_init: cannot register with nfnetlink.\n"); goto err_out; } ret = nfnetlink_subsys_register(&ctnl_exp_subsys); if (ret < 0) { pr_err("ctnetlink_init: cannot register exp with nfnetlink.\n"); goto err_unreg_subsys; } ret = register_pernet_subsys(&ctnetlink_net_ops); if (ret < 0) { pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT /* setup interaction between nf_queue and nf_conntrack_netlink. */ RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook); #endif return 0; err_unreg_exp_subsys: nfnetlink_subsys_unregister(&ctnl_exp_subsys); err_unreg_subsys: nfnetlink_subsys_unregister(&ctnl_subsys); err_out: return ret; } static void __exit ctnetlink_exit(void) { unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif synchronize_rcu(); } module_init(ctnetlink_init); module_exit(ctnetlink_exit);
85 85 25 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 // SPDX-License-Identifier: GPL-2.0 /* xfrm_hash.c: Common hash table code. * * Copyright (C) 2006 David S. Miller (davem@davemloft.net) */ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/memblock.h> #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/xfrm.h> #include "xfrm_hash.h" struct hlist_head *xfrm_hash_alloc(unsigned int sz) { struct hlist_head *n; if (sz <= PAGE_SIZE) n = kzalloc(sz, GFP_KERNEL); else if (hashdist) n = vzalloc(sz); else n = (struct hlist_head *) __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, get_order(sz)); return n; } void xfrm_hash_free(struct hlist_head *n, unsigned int sz) { if (sz <= PAGE_SIZE) kfree(n); else if (hashdist) vfree(n); else free_pages((unsigned long)n, get_order(sz)); }
469 11 8 8 3 56 1 4 23 4 5 468 24 290 443 94 208 10 1 25 4 37 32 20 354 68 239 644 232 3 3 1 2 4 2 1 1 6 278 13 2 35 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 /* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_H #include <linux/tracepoint.h> #include <asm/vmx.h> #include <asm/svm.h> #include <asm/clocksource.h> #include <asm/pvclock-abi.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm #ifdef CREATE_TRACE_POINTS #define tracing_kvm_rip_read(vcpu) ({ \ typeof(vcpu) __vcpu = vcpu; \ __vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \ }) #endif /* * Tracepoint for guest mode entry. */ TRACE_EVENT(kvm_entry, TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit), TP_ARGS(vcpu, force_immediate_exit), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned long, rip ) __field( bool, immediate_exit ) __field( u32, intr_info ) __field( u32, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->rip = tracing_kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, &__entry->error_code); ), TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s", __entry->vcpu_id, __entry->rip, __entry->intr_info, __entry->error_code, __entry->immediate_exit ? "[immediate exit]" : "") ); /* * Tracepoint for hypercall. */ TRACE_EVENT(kvm_hypercall, TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3), TP_ARGS(nr, a0, a1, a2, a3), TP_STRUCT__entry( __field( unsigned long, nr ) __field( unsigned long, a0 ) __field( unsigned long, a1 ) __field( unsigned long, a2 ) __field( unsigned long, a3 ) ), TP_fast_assign( __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; __entry->a2 = a2; __entry->a3 = a3; ), TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx", __entry->nr, __entry->a0, __entry->a1, __entry->a2, __entry->a3) ); /* * Tracepoint for hypercall. */ TRACE_EVENT(kvm_hv_hypercall, TP_PROTO(__u16 code, bool fast, __u16 var_cnt, __u16 rep_cnt, __u16 rep_idx, __u64 ingpa, __u64 outgpa), TP_ARGS(code, fast, var_cnt, rep_cnt, rep_idx, ingpa, outgpa), TP_STRUCT__entry( __field( __u16, rep_cnt ) __field( __u16, rep_idx ) __field( __u64, ingpa ) __field( __u64, outgpa ) __field( __u16, code ) __field( __u16, var_cnt ) __field( bool, fast ) ), TP_fast_assign( __entry->rep_cnt = rep_cnt; __entry->rep_idx = rep_idx; __entry->ingpa = ingpa; __entry->outgpa = outgpa; __entry->code = code; __entry->var_cnt = var_cnt; __entry->fast = fast; ), TP_printk("code 0x%x %s var_cnt 0x%x rep_cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", __entry->code, __entry->fast ? "fast" : "slow", __entry->var_cnt, __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, __entry->outgpa) ); TRACE_EVENT(kvm_hv_hypercall_done, TP_PROTO(u64 result), TP_ARGS(result), TP_STRUCT__entry( __field(__u64, result) ), TP_fast_assign( __entry->result = result; ), TP_printk("result 0x%llx", __entry->result) ); /* * Tracepoint for Xen hypercall. */ TRACE_EVENT(kvm_xen_hypercall, TP_PROTO(u8 cpl, unsigned long nr, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5), TP_ARGS(cpl, nr, a0, a1, a2, a3, a4, a5), TP_STRUCT__entry( __field(u8, cpl) __field(unsigned long, nr) __field(unsigned long, a0) __field(unsigned long, a1) __field(unsigned long, a2) __field(unsigned long, a3) __field(unsigned long, a4) __field(unsigned long, a5) ), TP_fast_assign( __entry->cpl = cpl; __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; __entry->a2 = a2; __entry->a3 = a3; __entry->a4 = a4; __entry->a4 = a5; ), TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", __entry->cpl, __entry->nr, __entry->a0, __entry->a1, __entry->a2, __entry->a3, __entry->a4, __entry->a5) ); /* * Tracepoint for PIO. */ #define KVM_PIO_IN 0 #define KVM_PIO_OUT 1 TRACE_EVENT(kvm_pio, TP_PROTO(unsigned int rw, unsigned int port, unsigned int size, unsigned int count, const void *data), TP_ARGS(rw, port, size, count, data), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, port ) __field( unsigned int, size ) __field( unsigned int, count ) __field( unsigned int, val ) ), TP_fast_assign( __entry->rw = rw; __entry->port = port; __entry->size = size; __entry->count = count; if (size == 1) __entry->val = *(unsigned char *)data; else if (size == 2) __entry->val = *(unsigned short *)data; else __entry->val = *(unsigned int *)data; ), TP_printk("pio_%s at 0x%x size %d count %d val 0x%x %s", __entry->rw ? "write" : "read", __entry->port, __entry->size, __entry->count, __entry->val, __entry->count > 1 ? "(...)" : "") ); /* * Tracepoint for fast mmio. */ TRACE_EVENT(kvm_fast_mmio, TP_PROTO(u64 gpa), TP_ARGS(gpa), TP_STRUCT__entry( __field(u64, gpa) ), TP_fast_assign( __entry->gpa = gpa; ), TP_printk("fast mmio at gpa 0x%llx", __entry->gpa) ); /* * Tracepoint for cpuid. */ TRACE_EVENT(kvm_cpuid, TP_PROTO(unsigned int function, unsigned int index, unsigned long rax, unsigned long rbx, unsigned long rcx, unsigned long rdx, bool found, bool used_max_basic), TP_ARGS(function, index, rax, rbx, rcx, rdx, found, used_max_basic), TP_STRUCT__entry( __field( unsigned int, function ) __field( unsigned int, index ) __field( unsigned long, rax ) __field( unsigned long, rbx ) __field( unsigned long, rcx ) __field( unsigned long, rdx ) __field( bool, found ) __field( bool, used_max_basic ) ), TP_fast_assign( __entry->function = function; __entry->index = index; __entry->rax = rax; __entry->rbx = rbx; __entry->rcx = rcx; __entry->rdx = rdx; __entry->found = found; __entry->used_max_basic = used_max_basic; ), TP_printk("func %x idx %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s%s", __entry->function, __entry->index, __entry->rax, __entry->rbx, __entry->rcx, __entry->rdx, __entry->found ? "found" : "not found", __entry->used_max_basic ? ", used max basic" : "") ); #define kvm_deliver_mode \ {0x0, "Fixed"}, \ {0x1, "LowPrio"}, \ {0x2, "SMI"}, \ {0x3, "Res3"}, \ {0x4, "NMI"}, \ {0x5, "INIT"}, \ {0x6, "SIPI"}, \ {0x7, "ExtINT"} #ifdef CONFIG_KVM_IOAPIC TRACE_EVENT(kvm_ioapic_set_irq, TP_PROTO(__u64 e, int pin, bool coalesced), TP_ARGS(e, pin, coalesced), TP_STRUCT__entry( __field( __u64, e ) __field( int, pin ) __field( bool, coalesced ) ), TP_fast_assign( __entry->e = e; __entry->pin = pin; __entry->coalesced = coalesced; ), TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), (__entry->e & (1<<11)) ? "logical" : "physical", (__entry->e & (1<<15)) ? "level" : "edge", (__entry->e & (1<<16)) ? "|masked" : "", __entry->coalesced ? " (coalesced)" : "") ); TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, TP_PROTO(__u64 e), TP_ARGS(e), TP_STRUCT__entry( __field( __u64, e ) ), TP_fast_assign( __entry->e = e; ), TP_printk("dst %x vec %u (%s|%s|%s%s)", (u8)(__entry->e >> 56), (u8)__entry->e, __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), (__entry->e & (1<<11)) ? "logical" : "physical", (__entry->e & (1<<15)) ? "level" : "edge", (__entry->e & (1<<16)) ? "|masked" : "") ); #endif TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), TP_STRUCT__entry( __field( __u64, address ) __field( __u64, data ) ), TP_fast_assign( __entry->address = address; __entry->data = data; ), TP_printk("dst %llx vec %u (%s|%s|%s%s)", (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00), (u8)__entry->data, __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), (__entry->address & (1<<2)) ? "logical" : "physical", (__entry->data & (1<<15)) ? "level" : "edge", (__entry->address & (1<<3)) ? "|rh" : "") ); #define AREG(x) { APIC_##x, "APIC_" #x } #define kvm_trace_symbol_apic \ AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \ AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \ AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \ AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \ AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \ AREG(ECTRL) /* * Tracepoint for apic access. */ TRACE_EVENT(kvm_apic, TP_PROTO(unsigned int rw, unsigned int reg, u64 val), TP_ARGS(rw, reg, val), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, reg ) __field( u64, val ) ), TP_fast_assign( __entry->rw = rw; __entry->reg = reg; __entry->val = val; ), TP_printk("apic_%s %s = 0x%llx", __entry->rw ? "write" : "read", __print_symbolic(__entry->reg, kvm_trace_symbol_apic), __entry->val) ); #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 #define kvm_print_exit_reason(exit_reason, isa) \ (isa == KVM_ISA_VMX) ? \ __print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \ __print_symbolic_u64(exit_reason, SVM_EXIT_REASONS), \ (isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \ (isa == KVM_ISA_VMX) ? \ __print_flags_u64(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : "" #define TRACE_EVENT_KVM_EXIT(name) \ TRACE_EVENT(name, \ TP_PROTO(struct kvm_vcpu *vcpu, u32 isa), \ TP_ARGS(vcpu, isa), \ \ TP_STRUCT__entry( \ __field( unsigned int, exit_reason ) \ __field( unsigned long, guest_rip ) \ __field( u32, isa ) \ __field( u64, info1 ) \ __field( u64, info2 ) \ __field( u32, intr_info ) \ __field( u32, error_code ) \ __field( unsigned int, vcpu_id ) \ __field( u64, requests ) \ ), \ \ TP_fast_assign( \ __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ kvm_x86_call(get_exit_info)(vcpu, \ &__entry->exit_reason, \ &__entry->info1, \ &__entry->info2, \ &__entry->intr_info, \ &__entry->error_code); \ ), \ \ TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \ "requests 0x%016llx", \ __entry->vcpu_id, \ kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \ __entry->guest_rip, __entry->info1, __entry->info2, \ __entry->intr_info, __entry->error_code, \ __entry->requests) \ ) /* * Tracepoint for kvm guest exit: */ TRACE_EVENT_KVM_EXIT(kvm_exit); /* * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_virq, TP_PROTO(unsigned int vector, bool soft, bool reinjected), TP_ARGS(vector, soft, reinjected), TP_STRUCT__entry( __field( unsigned int, vector ) __field( bool, soft ) __field( bool, reinjected ) ), TP_fast_assign( __entry->vector = vector; __entry->soft = soft; __entry->reinjected = reinjected; ), TP_printk("%s 0x%x%s", __entry->soft ? "Soft/INTn" : "IRQ", __entry->vector, __entry->reinjected ? " [reinjected]" : "") ); #define EXS(x) { x##_VECTOR, "#" #x } #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), EXS(MF), \ EXS(AC), EXS(MC), EXS(XM), EXS(VE), EXS(CP), \ EXS(HV), EXS(VC), EXS(SX) /* * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_exception, TP_PROTO(unsigned exception, bool has_error, unsigned error_code, bool reinjected), TP_ARGS(exception, has_error, error_code, reinjected), TP_STRUCT__entry( __field( u8, exception ) __field( u8, has_error ) __field( u32, error_code ) __field( bool, reinjected ) ), TP_fast_assign( __entry->exception = exception; __entry->has_error = has_error; __entry->error_code = error_code; __entry->reinjected = reinjected; ), TP_printk("%s%s%s%s%s", __print_symbolic(__entry->exception, kvm_trace_sym_exc), !__entry->has_error ? "" : " (", !__entry->has_error ? "" : __print_symbolic(__entry->error_code, { }), !__entry->has_error ? "" : ")", __entry->reinjected ? " [reinjected]" : "") ); /* * Tracepoint for page fault. */ TRACE_EVENT(kvm_page_fault, TP_PROTO(struct kvm_vcpu *vcpu, u64 fault_address, u64 error_code), TP_ARGS(vcpu, fault_address, error_code), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned long, guest_rip ) __field( u64, fault_address ) __field( u64, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->guest_rip = tracing_kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), TP_printk("vcpu %u rip 0x%lx address 0x%016llx error_code 0x%llx", __entry->vcpu_id, __entry->guest_rip, __entry->fault_address, __entry->error_code) ); /* * Tracepoint for guest MSR access. */ TRACE_EVENT(kvm_msr, TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), TP_ARGS(write, ecx, data, exception), TP_STRUCT__entry( __field( unsigned, write ) __field( u32, ecx ) __field( u64, data ) __field( u8, exception ) ), TP_fast_assign( __entry->write = write; __entry->ecx = ecx; __entry->data = data; __entry->exception = exception; ), TP_printk("msr_%s %x = 0x%llx%s", __entry->write ? "write" : "read", __entry->ecx, __entry->data, __entry->exception ? " (#GP)" : "") ); #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) /* * Tracepoint for guest CR access. */ TRACE_EVENT(kvm_cr, TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val), TP_ARGS(rw, cr, val), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, cr ) __field( unsigned long, val ) ), TP_fast_assign( __entry->rw = rw; __entry->cr = cr; __entry->val = val; ), TP_printk("cr_%s %x = 0x%lx", __entry->rw ? "write" : "read", __entry->cr, __entry->val) ); #define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val) #define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val) TRACE_EVENT(kvm_pic_set_irq, TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced), TP_ARGS(chip, pin, elcr, imr, coalesced), TP_STRUCT__entry( __field( __u8, chip ) __field( __u8, pin ) __field( __u8, elcr ) __field( __u8, imr ) __field( bool, coalesced ) ), TP_fast_assign( __entry->chip = chip; __entry->pin = pin; __entry->elcr = elcr; __entry->imr = imr; __entry->coalesced = coalesced; ), TP_printk("chip %u pin %u (%s%s)%s", __entry->chip, __entry->pin, (__entry->elcr & (1 << __entry->pin)) ? "level":"edge", (__entry->imr & (1 << __entry->pin)) ? "|masked":"", __entry->coalesced ? " (coalesced)" : "") ); #define kvm_apic_dst_shorthand \ {0x0, "dst"}, \ {0x1, "self"}, \ {0x2, "all"}, \ {0x3, "all-but-self"} TRACE_EVENT(kvm_apic_ipi, TP_PROTO(__u32 icr_low, __u32 dest_id), TP_ARGS(icr_low, dest_id), TP_STRUCT__entry( __field( __u32, icr_low ) __field( __u32, dest_id ) ), TP_fast_assign( __entry->icr_low = icr_low; __entry->dest_id = dest_id; ), TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)", __entry->dest_id, (u8)__entry->icr_low, __print_symbolic((__entry->icr_low >> 8 & 0x7), kvm_deliver_mode), (__entry->icr_low & (1<<11)) ? "logical" : "physical", (__entry->icr_low & (1<<14)) ? "assert" : "de-assert", (__entry->icr_low & (1<<15)) ? "level" : "edge", __print_symbolic((__entry->icr_low >> 18 & 0x3), kvm_apic_dst_shorthand)) ); TRACE_EVENT(kvm_apic_accept_irq, TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u16, tm ) __field( __u8, vec ) ), TP_fast_assign( __entry->apicid = apicid; __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; ), TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), __entry->tm ? "level" : "edge") ); TRACE_EVENT(kvm_eoi, TP_PROTO(struct kvm_lapic *apic, int vector), TP_ARGS(apic, vector), TP_STRUCT__entry( __field( __u32, apicid ) __field( int, vector ) ), TP_fast_assign( __entry->apicid = apic->vcpu->vcpu_id; __entry->vector = vector; ), TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) ); TRACE_EVENT(kvm_pv_eoi, TP_PROTO(struct kvm_lapic *apic, int vector), TP_ARGS(apic, vector), TP_STRUCT__entry( __field( __u32, apicid ) __field( int, vector ) ), TP_fast_assign( __entry->apicid = apic->vcpu->vcpu_id; __entry->vector = vector; ), TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) ); /* * Tracepoint for nested VMRUN */ TRACE_EVENT(kvm_nested_vmenter, TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, __u32 event_inj, bool tdp_enabled, __u64 guest_tdp_pgd, __u64 guest_cr3, __u32 isa), TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, tdp_enabled, guest_tdp_pgd, guest_cr3, isa), TP_STRUCT__entry( __field( __u64, rip ) __field( __u64, vmcb ) __field( __u64, nested_rip ) __field( __u32, int_ctl ) __field( __u32, event_inj ) __field( bool, tdp_enabled ) __field( __u64, guest_pgd ) __field( __u32, isa ) ), TP_fast_assign( __entry->rip = rip; __entry->vmcb = vmcb; __entry->nested_rip = nested_rip; __entry->int_ctl = int_ctl; __entry->event_inj = event_inj; __entry->tdp_enabled = tdp_enabled; __entry->guest_pgd = tdp_enabled ? guest_tdp_pgd : guest_cr3; __entry->isa = isa; ), TP_printk("rip: 0x%016llx %s: 0x%016llx nested_rip: 0x%016llx " "int_ctl: 0x%08x event_inj: 0x%08x nested_%s=%s %s: 0x%016llx", __entry->rip, __entry->isa == KVM_ISA_VMX ? "vmcs" : "vmcb", __entry->vmcb, __entry->nested_rip, __entry->int_ctl, __entry->event_inj, __entry->isa == KVM_ISA_VMX ? "ept" : "npt", __entry->tdp_enabled ? "y" : "n", !__entry->tdp_enabled ? "guest_cr3" : __entry->isa == KVM_ISA_VMX ? "nested_eptp" : "nested_cr3", __entry->guest_pgd) ); TRACE_EVENT(kvm_nested_intercepts, TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u32 intercept1, __u32 intercept2, __u32 intercept3), TP_ARGS(cr_read, cr_write, exceptions, intercept1, intercept2, intercept3), TP_STRUCT__entry( __field( __u16, cr_read ) __field( __u16, cr_write ) __field( __u32, exceptions ) __field( __u32, intercept1 ) __field( __u32, intercept2 ) __field( __u32, intercept3 ) ), TP_fast_assign( __entry->cr_read = cr_read; __entry->cr_write = cr_write; __entry->exceptions = exceptions; __entry->intercept1 = intercept1; __entry->intercept2 = intercept2; __entry->intercept3 = intercept3; ), TP_printk("cr_read: %04x cr_write: %04x excp: %08x " "intercepts: %08x %08x %08x", __entry->cr_read, __entry->cr_write, __entry->exceptions, __entry->intercept1, __entry->intercept2, __entry->intercept3) ); /* * Tracepoint for #VMEXIT while nested */ TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit); /* * Tracepoint for #VMEXIT reinjected to the guest */ TRACE_EVENT(kvm_nested_vmexit_inject, TP_PROTO(__u64 exit_code, __u64 exit_info1, __u64 exit_info2, __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), TP_ARGS(exit_code, exit_info1, exit_info2, exit_int_info, exit_int_info_err, isa), TP_STRUCT__entry( __field( __u32, exit_code ) __field( __u64, exit_info1 ) __field( __u64, exit_info2 ) __field( __u32, exit_int_info ) __field( __u32, exit_int_info_err ) __field( __u32, isa ) ), TP_fast_assign( __entry->exit_code = exit_code; __entry->exit_info1 = exit_info1; __entry->exit_info2 = exit_info2; __entry->exit_int_info = exit_int_info; __entry->exit_int_info_err = exit_int_info_err; __entry->isa = isa; ), TP_printk("reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", kvm_print_exit_reason(__entry->exit_code, __entry->isa), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_nested_intr_vmexit, TP_PROTO(__u64 rip), TP_ARGS(rip), TP_STRUCT__entry( __field( __u64, rip ) ), TP_fast_assign( __entry->rip = rip ), TP_printk("rip: 0x%016llx", __entry->rip) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( __field( __u64, rip ) __field( unsigned int, asid ) __field( __u64, address ) ), TP_fast_assign( __entry->rip = rip; __entry->asid = asid; __entry->address = address; ), TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_skinit, TP_PROTO(__u64 rip, __u32 slb), TP_ARGS(rip, slb), TP_STRUCT__entry( __field( __u64, rip ) __field( __u32, slb ) ), TP_fast_assign( __entry->rip = rip; __entry->slb = slb; ), TP_printk("rip: 0x%016llx slb: 0x%08x", __entry->rip, __entry->slb) ); #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) #define KVM_EMUL_INSN_F_CS_D (1 << 2) #define KVM_EMUL_INSN_F_CS_L (1 << 3) #define kvm_trace_symbol_emul_flags \ { 0, "real" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_L, "prot64" } #define kei_decode_mode(mode) ({ \ u8 flags = 0xff; \ switch (mode) { \ case X86EMUL_MODE_REAL: \ flags = 0; \ break; \ case X86EMUL_MODE_VM86: \ flags = KVM_EMUL_INSN_F_EFL_VM; \ break; \ case X86EMUL_MODE_PROT16: \ flags = KVM_EMUL_INSN_F_CR0_PE; \ break; \ case X86EMUL_MODE_PROT32: \ flags = KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_D; \ break; \ case X86EMUL_MODE_PROT64: \ flags = KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_L; \ break; \ } \ flags; \ }) TRACE_EVENT(kvm_emulate_insn, TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), TP_ARGS(vcpu, failed), TP_STRUCT__entry( __field( __u64, rip ) __field( __u32, csbase ) __field( __u8, len ) __array( __u8, insn, X86_MAX_INSTRUCTION_LENGTH ) __field( __u8, flags ) __field( __u8, failed ) ), TP_fast_assign( __entry->csbase = kvm_x86_call(get_segment_base)(vcpu, VCPU_SREG_CS); __entry->len = vcpu->arch.emulate_ctxt->fetch.ptr - vcpu->arch.emulate_ctxt->fetch.data; __entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len; memcpy(__entry->insn, vcpu->arch.emulate_ctxt->fetch.data, X86_MAX_INSTRUCTION_LENGTH); __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt->mode); __entry->failed = failed; ), TP_printk("%x:%llx:%s (%s)%s", __entry->csbase, __entry->rip, __print_hex(__entry->insn, __entry->len), __print_symbolic(__entry->flags, kvm_trace_symbol_emul_flags), __entry->failed ? " failed" : "" ) ); #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) TRACE_EVENT( vcpu_match_mmio, TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match), TP_ARGS(gva, gpa, write, gpa_match), TP_STRUCT__entry( __field(gva_t, gva) __field(gpa_t, gpa) __field(bool, write) __field(bool, gpa_match) ), TP_fast_assign( __entry->gva = gva; __entry->gpa = gpa; __entry->write = write; __entry->gpa_match = gpa_match ), TP_printk("gva %#lx gpa %#llx %s %s", __entry->gva, __entry->gpa, __entry->write ? "Write" : "Read", __entry->gpa_match ? "GPA" : "GVA") ); TRACE_EVENT(kvm_write_tsc_offset, TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset, __u64 next_tsc_offset), TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( __u64, previous_tsc_offset ) __field( __u64, next_tsc_offset ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->previous_tsc_offset = previous_tsc_offset; __entry->next_tsc_offset = next_tsc_offset; ), TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id, __entry->previous_tsc_offset, __entry->next_tsc_offset) ); #ifdef CONFIG_X86_64 #define host_clocks \ {VDSO_CLOCKMODE_NONE, "none"}, \ {VDSO_CLOCKMODE_TSC, "tsc"} \ TRACE_EVENT(kvm_update_master_clock, TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), TP_ARGS(use_master_clock, host_clock, offset_matched), TP_STRUCT__entry( __field( bool, use_master_clock ) __field( unsigned int, host_clock ) __field( bool, offset_matched ) ), TP_fast_assign( __entry->use_master_clock = use_master_clock; __entry->host_clock = host_clock; __entry->offset_matched = offset_matched; ), TP_printk("masterclock %d hostclock %s offsetmatched %u", __entry->use_master_clock, __print_symbolic(__entry->host_clock, host_clocks), __entry->offset_matched) ); TRACE_EVENT(kvm_track_tsc, TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched, unsigned int online_vcpus, bool use_master_clock, unsigned int host_clock), TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock, host_clock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, nr_vcpus_matched_tsc ) __field( unsigned int, online_vcpus ) __field( bool, use_master_clock ) __field( unsigned int, host_clock ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->nr_vcpus_matched_tsc = nr_matched; __entry->online_vcpus = online_vcpus; __entry->use_master_clock = use_master_clock; __entry->host_clock = host_clock; ), TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u" " hostclock %s", __entry->vcpu_id, __entry->use_master_clock, __entry->nr_vcpus_matched_tsc, __entry->online_vcpus, __print_symbolic(__entry->host_clock, host_clocks)) ); #endif /* CONFIG_X86_64 */ /* * Tracepoint for PML full VMEXIT. */ TRACE_EVENT(kvm_pml_full, TP_PROTO(unsigned int vcpu_id), TP_ARGS(vcpu_id), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; ), TP_printk("vcpu %d: PML full", __entry->vcpu_id) ); TRACE_EVENT(kvm_ple_window_update, TP_PROTO(unsigned int vcpu_id, unsigned int new, unsigned int old), TP_ARGS(vcpu_id, new, old), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, new ) __field( unsigned int, old ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->new = new; __entry->old = old; ), TP_printk("vcpu %u old %u new %u (%s)", __entry->vcpu_id, __entry->old, __entry->new, __entry->old < __entry->new ? "growed" : "shrinked") ); TRACE_EVENT(kvm_pvclock_update, TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock), TP_ARGS(vcpu_id, pvclock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( __u32, version ) __field( __u64, tsc_timestamp ) __field( __u64, system_time ) __field( __u32, tsc_to_system_mul ) __field( __s8, tsc_shift ) __field( __u8, flags ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->version = pvclock->version; __entry->tsc_timestamp = pvclock->tsc_timestamp; __entry->system_time = pvclock->system_time; __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul; __entry->tsc_shift = pvclock->tsc_shift; __entry->flags = pvclock->flags; ), TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, " "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, " "flags 0x%x }", __entry->vcpu_id, __entry->version, __entry->tsc_timestamp, __entry->system_time, __entry->tsc_to_system_mul, __entry->tsc_shift, __entry->flags) ); TRACE_EVENT(kvm_wait_lapic_expire, TP_PROTO(unsigned int vcpu_id, s64 delta), TP_ARGS(vcpu_id, delta), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( s64, delta ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->delta = delta; ), TP_printk("vcpu %u: delta %lld (%s)", __entry->vcpu_id, __entry->delta, __entry->delta < 0 ? "early" : "late") ); TRACE_EVENT(kvm_smm_transition, TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering), TP_ARGS(vcpu_id, smbase, entering), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( u64, smbase ) __field( bool, entering ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->smbase = smbase; __entry->entering = entering; ), TP_printk("vcpu %u: %s SMM, smbase 0x%llx", __entry->vcpu_id, __entry->entering ? "entering" : "leaving", __entry->smbase) ); /* * Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC. */ TRACE_EVENT(kvm_pi_irte_update, TP_PROTO(unsigned int host_irq, struct kvm_vcpu *vcpu, unsigned int gsi, unsigned int gvec, bool set), TP_ARGS(host_irq, vcpu, gsi, gvec, set), TP_STRUCT__entry( __field( unsigned int, host_irq ) __field( int, vcpu_id ) __field( unsigned int, gsi ) __field( unsigned int, gvec ) __field( bool, set ) ), TP_fast_assign( __entry->host_irq = host_irq; __entry->vcpu_id = vcpu ? vcpu->vcpu_id : -1; __entry->gsi = gsi; __entry->gvec = gvec; __entry->set = set; ), TP_printk("PI is %s for irq %u, vcpu %d, gsi: 0x%x, gvec: 0x%x", __entry->set ? "enabled and being updated" : "disabled", __entry->host_irq, __entry->vcpu_id, __entry->gsi, __entry->gvec) ); /* * Tracepoint for kvm_hv_notify_acked_sint. */ TRACE_EVENT(kvm_hv_notify_acked_sint, TP_PROTO(int vcpu_id, u32 sint), TP_ARGS(vcpu_id, sint), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->sint = sint; ), TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint) ); /* * Tracepoint for synic_set_irq. */ TRACE_EVENT(kvm_hv_synic_set_irq, TP_PROTO(int vcpu_id, u32 sint, int vector, int ret), TP_ARGS(vcpu_id, sint, vector, ret), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) __field(int, vector) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->sint = sint; __entry->vector = vector; __entry->ret = ret; ), TP_printk("vcpu_id %d sint %u vector %d ret %d", __entry->vcpu_id, __entry->sint, __entry->vector, __entry->ret) ); /* * Tracepoint for kvm_hv_synic_send_eoi. */ TRACE_EVENT(kvm_hv_synic_send_eoi, TP_PROTO(int vcpu_id, int vector), TP_ARGS(vcpu_id, vector), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) __field(int, vector) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vector = vector; ), TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector) ); /* * Tracepoint for synic_set_msr. */ TRACE_EVENT(kvm_hv_synic_set_msr, TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host), TP_ARGS(vcpu_id, msr, data, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, msr) __field(u64, data) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->msr = msr; __entry->data = data; __entry->host = host ), TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d", __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) ); /* * Tracepoint for stimer_set_config. */ TRACE_EVENT(kvm_hv_stimer_set_config, TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host), TP_ARGS(vcpu_id, timer_index, config, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, config) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->config = config; __entry->host = host; ), TP_printk("vcpu_id %d timer %d config 0x%llx host %d", __entry->vcpu_id, __entry->timer_index, __entry->config, __entry->host) ); /* * Tracepoint for stimer_set_count. */ TRACE_EVENT(kvm_hv_stimer_set_count, TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host), TP_ARGS(vcpu_id, timer_index, count, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, count) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->count = count; __entry->host = host; ), TP_printk("vcpu_id %d timer %d count %llu host %d", __entry->vcpu_id, __entry->timer_index, __entry->count, __entry->host) ); /* * Tracepoint for stimer_start(periodic timer case). */ TRACE_EVENT(kvm_hv_stimer_start_periodic, TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time), TP_ARGS(vcpu_id, timer_index, time_now, exp_time), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, time_now) __field(u64, exp_time) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->time_now = time_now; __entry->exp_time = exp_time; ), TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu", __entry->vcpu_id, __entry->timer_index, __entry->time_now, __entry->exp_time) ); /* * Tracepoint for stimer_start(one-shot timer case). */ TRACE_EVENT(kvm_hv_stimer_start_one_shot, TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count), TP_ARGS(vcpu_id, timer_index, time_now, count), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, time_now) __field(u64, count) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->time_now = time_now; __entry->count = count; ), TP_printk("vcpu_id %d timer %d time_now %llu count %llu", __entry->vcpu_id, __entry->timer_index, __entry->time_now, __entry->count) ); /* * Tracepoint for stimer_timer_callback. */ TRACE_EVENT(kvm_hv_stimer_callback, TP_PROTO(int vcpu_id, int timer_index), TP_ARGS(vcpu_id, timer_index), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; ), TP_printk("vcpu_id %d timer %d", __entry->vcpu_id, __entry->timer_index) ); /* * Tracepoint for stimer_expiration. */ TRACE_EVENT(kvm_hv_stimer_expiration, TP_PROTO(int vcpu_id, int timer_index, int direct, int msg_send_result), TP_ARGS(vcpu_id, timer_index, direct, msg_send_result), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(int, direct) __field(int, msg_send_result) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->direct = direct; __entry->msg_send_result = msg_send_result; ), TP_printk("vcpu_id %d timer %d direct %d send result %d", __entry->vcpu_id, __entry->timer_index, __entry->direct, __entry->msg_send_result) ); /* * Tracepoint for stimer_cleanup. */ TRACE_EVENT(kvm_hv_stimer_cleanup, TP_PROTO(int vcpu_id, int timer_index), TP_ARGS(vcpu_id, timer_index), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; ), TP_printk("vcpu_id %d timer %d", __entry->vcpu_id, __entry->timer_index) ); #define kvm_print_apicv_inhibit_reasons(inhibits) \ (inhibits), (inhibits) ? " " : "", \ (inhibits) ? __print_flags(inhibits, "|", APICV_INHIBIT_REASONS) : "" TRACE_EVENT(kvm_apicv_inhibit_changed, TP_PROTO(int reason, bool set, unsigned long inhibits), TP_ARGS(reason, set, inhibits), TP_STRUCT__entry( __field(int, reason) __field(bool, set) __field(unsigned long, inhibits) ), TP_fast_assign( __entry->reason = reason; __entry->set = set; __entry->inhibits = inhibits; ), TP_printk("%s reason=%u, inhibits=0x%lx%s%s", __entry->set ? "set" : "cleared", __entry->reason, kvm_print_apicv_inhibit_reasons(__entry->inhibits)) ); TRACE_EVENT(kvm_apicv_accept_irq, TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u16, tm ) __field( __u8, vec ) ), TP_fast_assign( __entry->apicid = apicid; __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; ), TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), __entry->tm ? "level" : "edge") ); /* * Tracepoint for AMD AVIC */ TRACE_EVENT(kvm_avic_incomplete_ipi, TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index), TP_ARGS(vcpu, icrh, icrl, id, index), TP_STRUCT__entry( __field(u32, vcpu) __field(u32, icrh) __field(u32, icrl) __field(u32, id) __field(u32, index) ), TP_fast_assign( __entry->vcpu = vcpu; __entry->icrh = icrh; __entry->icrl = icrl; __entry->id = id; __entry->index = index; ), TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u", __entry->vcpu, __entry->icrh, __entry->icrl, __entry->id, __entry->index) ); TRACE_EVENT(kvm_avic_unaccelerated_access, TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec), TP_ARGS(vcpu, offset, ft, rw, vec), TP_STRUCT__entry( __field(u32, vcpu) __field(u32, offset) __field(bool, ft) __field(bool, rw) __field(u32, vec) ), TP_fast_assign( __entry->vcpu = vcpu; __entry->offset = offset; __entry->ft = ft; __entry->rw = rw; __entry->vec = vec; ), TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x", __entry->vcpu, __entry->offset, __print_symbolic(__entry->offset, kvm_trace_symbol_apic), __entry->ft ? "trap" : "fault", __entry->rw ? "write" : "read", __entry->vec) ); TRACE_EVENT(kvm_avic_ga_log, TP_PROTO(u32 vmid, u32 vcpuid), TP_ARGS(vmid, vcpuid), TP_STRUCT__entry( __field(u32, vmid) __field(u32, vcpuid) ), TP_fast_assign( __entry->vmid = vmid; __entry->vcpuid = vcpuid; ), TP_printk("vmid=%u, vcpuid=%u", __entry->vmid, __entry->vcpuid) ); TRACE_EVENT(kvm_avic_kick_vcpu_slowpath, TP_PROTO(u32 icrh, u32 icrl, u32 index), TP_ARGS(icrh, icrl, index), TP_STRUCT__entry( __field(u32, icrh) __field(u32, icrl) __field(u32, index) ), TP_fast_assign( __entry->icrh = icrh; __entry->icrl = icrl; __entry->index = index; ), TP_printk("icrh:icrl=%#08x:%08x, index=%u", __entry->icrh, __entry->icrl, __entry->index) ); TRACE_EVENT(kvm_avic_doorbell, TP_PROTO(u32 vcpuid, u32 apicid), TP_ARGS(vcpuid, apicid), TP_STRUCT__entry( __field(u32, vcpuid) __field(u32, apicid) ), TP_fast_assign( __entry->vcpuid = vcpuid; __entry->apicid = apicid; ), TP_printk("vcpuid=%u, apicid=%u", __entry->vcpuid, __entry->apicid) ); TRACE_EVENT(kvm_hv_timer_state, TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use), TP_ARGS(vcpu_id, hv_timer_in_use), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(unsigned int, hv_timer_in_use) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->hv_timer_in_use = hv_timer_in_use; ), TP_printk("vcpu_id %x hv_timer %x", __entry->vcpu_id, __entry->hv_timer_in_use) ); /* * Tracepoint for kvm_hv_flush_tlb. */ TRACE_EVENT(kvm_hv_flush_tlb, TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode), TP_ARGS(processor_mask, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, processor_mask) __field(u64, address_space) __field(u64, flags) __field(bool, guest_mode) ), TP_fast_assign( __entry->processor_mask = processor_mask; __entry->address_space = address_space; __entry->flags = flags; __entry->guest_mode = guest_mode; ), TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s", __entry->processor_mask, __entry->address_space, __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoint for kvm_hv_flush_tlb_ex. */ TRACE_EVENT(kvm_hv_flush_tlb_ex, TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode), TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, valid_bank_mask) __field(u64, format) __field(u64, address_space) __field(u64, flags) __field(bool, guest_mode) ), TP_fast_assign( __entry->valid_bank_mask = valid_bank_mask; __entry->format = format; __entry->address_space = address_space; __entry->flags = flags; __entry->guest_mode = guest_mode; ), TP_printk("valid_bank_mask 0x%llx format 0x%llx " "address_space 0x%llx flags 0x%llx %s", __entry->valid_bank_mask, __entry->format, __entry->address_space, __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoints for kvm_hv_send_ipi. */ TRACE_EVENT(kvm_hv_send_ipi, TP_PROTO(u32 vector, u64 processor_mask), TP_ARGS(vector, processor_mask), TP_STRUCT__entry( __field(u32, vector) __field(u64, processor_mask) ), TP_fast_assign( __entry->vector = vector; __entry->processor_mask = processor_mask; ), TP_printk("vector %x processor_mask 0x%llx", __entry->vector, __entry->processor_mask) ); TRACE_EVENT(kvm_hv_send_ipi_ex, TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask), TP_ARGS(vector, format, valid_bank_mask), TP_STRUCT__entry( __field(u32, vector) __field(u64, format) __field(u64, valid_bank_mask) ), TP_fast_assign( __entry->vector = vector; __entry->format = format; __entry->valid_bank_mask = valid_bank_mask; ), TP_printk("vector %x format %llx valid_bank_mask 0x%llx", __entry->vector, __entry->format, __entry->valid_bank_mask) ); TRACE_EVENT(kvm_pv_tlb_flush, TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb), TP_ARGS(vcpu_id, need_flush_tlb), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( bool, need_flush_tlb ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->need_flush_tlb = need_flush_tlb; ), TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id, __entry->need_flush_tlb ? "true" : "false") ); /* * Tracepoint for failed nested VMX VM-Enter. */ TRACE_EVENT(kvm_nested_vmenter_failed, TP_PROTO(const char *msg, u32 err), TP_ARGS(msg, err), TP_STRUCT__entry( __string(msg, msg) __field(u32, err) ), TP_fast_assign( __assign_str(msg); __entry->err = err; ), TP_printk("%s%s", __get_str(msg), !__entry->err ? "" : __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); /* * Tracepoint for syndbg_set_msr. */ TRACE_EVENT(kvm_hv_syndbg_set_msr, TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), TP_ARGS(vcpu_id, vp_index, msr, data), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, vp_index) __field(u32, msr) __field(u64, data) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vp_index = vp_index; __entry->msr = msr; __entry->data = data; ), TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); /* * Tracepoint for syndbg_get_msr. */ TRACE_EVENT(kvm_hv_syndbg_get_msr, TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), TP_ARGS(vcpu_id, vp_index, msr, data), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, vp_index) __field(u32, msr) __field(u64, data) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vp_index = vp_index; __entry->msr = msr; __entry->data = data; ), TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); /* * Tracepoint for the start of VMGEXIT processing */ TRACE_EVENT(kvm_vmgexit_enter, TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), TP_ARGS(vcpu_id, ghcb), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, exit_reason) __field(u64, info1) __field(u64, info2) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->exit_reason = ghcb->save.sw_exit_code; __entry->info1 = ghcb->save.sw_exit_info_1; __entry->info2 = ghcb->save.sw_exit_info_2; ), TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", __entry->vcpu_id, __entry->exit_reason, __entry->info1, __entry->info2) ); /* * Tracepoint for the end of VMGEXIT processing */ TRACE_EVENT(kvm_vmgexit_exit, TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), TP_ARGS(vcpu_id, ghcb), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, exit_reason) __field(u64, info1) __field(u64, info2) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->exit_reason = ghcb->save.sw_exit_code; __entry->info1 = ghcb->save.sw_exit_info_1; __entry->info2 = ghcb->save.sw_exit_info_2; ), TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", __entry->vcpu_id, __entry->exit_reason, __entry->info1, __entry->info2) ); /* * Tracepoint for the start of VMGEXIT MSR procotol processing */ TRACE_EVENT(kvm_vmgexit_msr_protocol_enter, TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa), TP_ARGS(vcpu_id, ghcb_gpa), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, ghcb_gpa) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->ghcb_gpa = ghcb_gpa; ), TP_printk("vcpu %u, ghcb_gpa %016llx", __entry->vcpu_id, __entry->ghcb_gpa) ); /* * Tracepoint for the end of VMGEXIT MSR procotol processing */ TRACE_EVENT(kvm_vmgexit_msr_protocol_exit, TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result), TP_ARGS(vcpu_id, ghcb_gpa, result), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, ghcb_gpa) __field(int, result) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->ghcb_gpa = ghcb_gpa; __entry->result = result; ), TP_printk("vcpu %u, ghcb_gpa %016llx, result %d", __entry->vcpu_id, __entry->ghcb_gpa, __entry->result) ); /* * Tracepoint for #NPFs due to RMP faults. */ TRACE_EVENT(kvm_rmp_fault, TP_PROTO(struct kvm_vcpu *vcpu, u64 gpa, u64 pfn, u64 error_code, int rmp_level, int psmash_ret), TP_ARGS(vcpu, gpa, pfn, error_code, rmp_level, psmash_ret), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, gpa) __field(u64, pfn) __field(u64, error_code) __field(int, rmp_level) __field(int, psmash_ret) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->gpa = gpa; __entry->pfn = pfn; __entry->error_code = error_code; __entry->rmp_level = rmp_level; __entry->psmash_ret = psmash_ret; ), TP_printk("vcpu %u gpa %016llx pfn 0x%llx error_code 0x%llx rmp_level %d psmash_ret %d", __entry->vcpu_id, __entry->gpa, __entry->pfn, __entry->error_code, __entry->rmp_level, __entry->psmash_ret) ); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH ../../arch/x86/kvm #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace /* This part must be outside protection */ #include <trace/define_trace.h>
34 34 34 8 34 32 40 9 9 9 63 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* AF_RXRPC internal definitions * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/atomic.h> #include <linux/seqlock.h> #include <linux/win_minmax.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <keys/rxrpc-type.h> #include "protocol.h" #define FCRYPT_BSIZE 8 struct rxrpc_crypt { union { u8 x[FCRYPT_BSIZE]; __be32 n[2]; }; } __attribute__((aligned(8))); #define rxrpc_queue_work(WS) queue_work(rxrpc_workqueue, (WS)) #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) struct key_preparsed_payload; struct rxrpc_connection; struct rxrpc_txbuf; struct rxrpc_txqueue; struct rxgk_context; /* * Mark applied to socket buffers in skb->mark. skb->priority is used * to pass supplementary information. */ enum rxrpc_skb_mark { RXRPC_SKB_MARK_PACKET, /* Received packet */ RXRPC_SKB_MARK_ERROR, /* Error notification */ RXRPC_SKB_MARK_CHALLENGE, /* Challenge notification */ RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ RXRPC_SKB_MARK_REJECT_CONN_ABORT, /* Reject with connection ABORT (code in skb->priority) */ }; /* * sk_state for RxRPC sockets */ enum { RXRPC_UNBOUND = 0, RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */ RXRPC_CLIENT_BOUND, /* client local address bound */ RXRPC_SERVER_BOUND, /* server local address bound */ RXRPC_SERVER_BOUND2, /* second server local address bound */ RXRPC_SERVER_LISTENING, /* server listening for connections */ RXRPC_SERVER_LISTEN_DISABLED, /* server listening disabled */ RXRPC_CLOSE, /* socket is being closed */ }; /* * Per-network namespace data. */ struct rxrpc_net { struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ u32 epoch; /* Local epoch for detecting local-end reset */ struct list_head calls; /* List of calls active in this namespace */ spinlock_t call_lock; /* Lock for ->calls */ atomic_t nr_calls; /* Count of allocated calls */ atomic_t nr_conns; struct list_head bundle_proc_list; /* List of bundles for proc */ struct list_head conn_proc_list; /* List of conns in this namespace for proc */ struct list_head service_conns; /* Service conns in this namespace */ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ struct work_struct service_conn_reaper; struct timer_list service_conn_reap_timer; bool live; atomic_t nr_client_conns; struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ DECLARE_HASHTABLE (peer_hash, 10); spinlock_t peer_hash_lock; /* Lock for ->peer_hash */ #define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */ u8 peer_keepalive_cursor; time64_t peer_keepalive_base; struct list_head peer_keepalive[32]; struct list_head peer_keepalive_new; struct timer_list peer_keepalive_timer; struct work_struct peer_keepalive_work; atomic_t stat_tx_data; atomic_t stat_tx_data_retrans; atomic_t stat_tx_data_send; atomic_t stat_tx_data_send_frag; atomic_t stat_tx_data_send_fail; atomic_t stat_tx_data_send_msgsize; atomic_t stat_tx_data_underflow; atomic_t stat_tx_data_cwnd_reset; atomic_t stat_rx_data; atomic_t stat_rx_data_reqack; atomic_t stat_rx_data_jumbo; atomic_t stat_tx_ack_fill; atomic_t stat_tx_ack_send; atomic_t stat_tx_ack_skip; atomic_t stat_tx_acks[256]; atomic_t stat_rx_acks[256]; atomic_t stat_tx_jumbo[10]; atomic_t stat_rx_jumbo[10]; atomic_t stat_why_req_ack[9]; atomic_t stat_io_loop; }; /* * Service backlog preallocation. * * This contains circular buffers of preallocated peers, connections and calls * for incoming service calls and their head and tail pointers. This allows * calls to be set up in the data_ready handler, thereby avoiding the need to * shuffle packets around so much. */ struct rxrpc_backlog { unsigned short peer_backlog_head; unsigned short peer_backlog_tail; unsigned short conn_backlog_head; unsigned short conn_backlog_tail; unsigned short call_backlog_head; unsigned short call_backlog_tail; #define RXRPC_BACKLOG_MAX 32 struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; }; /* * RxRPC socket definition */ struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; const struct rxrpc_kernel_ops *app_ops; /* Table of kernel app notification funcs */ struct rxrpc_local *local; /* local endpoint */ struct rxrpc_backlog *backlog; /* Preallocation for services */ struct sk_buff_head recvmsg_oobq; /* OOB messages for recvmsg to pick up */ struct rb_root pending_oobq; /* OOB messages awaiting userspace to respond to */ u64 oob_id_counter; /* OOB message ID counter */ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ struct list_head sock_calls; /* List of calls owned by this socket */ struct list_head to_be_accepted; /* calls awaiting acceptance */ struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ spinlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ #define RXRPC_SOCK_MANAGE_RESPONSE 1 /* User wants to manage RESPONSE packets */ rwlock_t call_lock; /* lock for calls */ u32 min_sec_level; /* minimum security level */ #define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT bool exclusive; /* Exclusive connection for a client socket */ u16 second_service; /* Additional service bound to the endpoint */ struct { /* Service upgrade information */ u16 from; /* Service ID to upgrade (if not 0) */ u16 to; /* service ID to upgrade to */ } service_upgrade; sa_family_t family; /* Protocol family created with */ struct sockaddr_rxrpc srx; /* Primary Service/local addresses */ struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */ }; #define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk) /* * CPU-byteorder normalised Rx packet header. */ struct rxrpc_host_header { u32 epoch; /* client boot timestamp */ u32 cid; /* connection and channel ID */ u32 callNumber; /* call ID (0 for connection-level packets) */ u32 seq; /* sequence number of pkt in call stream */ u32 serial; /* serial number of pkt sent to network */ u8 type; /* packet type */ u8 flags; /* packet flags */ u8 userStatus; /* app-layer defined status */ u8 securityIndex; /* security protocol ID */ union { u16 _rsvd; /* reserved */ u16 cksum; /* kerberos security checksum */ }; u16 serviceId; /* service ID */ } __packed; /* * RxRPC socket buffer private variables * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { union { struct rxrpc_connection *poke_conn; /* Conn referred to (poke packet) */ struct { u16 offset; /* Offset of data */ u16 len; /* Length of data */ u8 flags; #define RXRPC_RX_VERIFIED 0x01 }; struct { rxrpc_seq_t first_ack; /* First packet in acks table */ rxrpc_seq_t prev_ack; /* Highest seq seen */ rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */ u16 nr_acks; /* Number of acks+nacks */ u8 reason; /* Reason for ack */ } ack; struct { struct rxrpc_connection *conn; /* Connection referred to */ union { u32 rxkad_nonce; }; } chall; struct { rxrpc_serial_t challenge_serial; u32 kvno; u32 version; u16 len; u16 ticket_len; } resp; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ }; #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) /* * RxRPC security module interface */ struct rxrpc_security { const char *name; /* name of this service */ u8 security_index; /* security type provided */ u32 no_key_abort; /* Abort code indicating no key */ /* Initialise a security service */ int (*init)(void); /* Clean up a security service */ void (*exit)(void); /* Parse the information from a server key */ int (*preparse_server_key)(struct key_preparsed_payload *); /* Clean up the preparse buffer after parsing a server key */ void (*free_preparse_server_key)(struct key_preparsed_payload *); /* Destroy the payload of a server key */ void (*destroy_server_key)(struct key *); /* Describe a server key */ void (*describe_server_key)(const struct key *, struct seq_file *); /* initialise a connection's security */ int (*init_connection_security)(struct rxrpc_connection *, struct rxrpc_key_token *); /* Work out how much data we can store in a packet, given an estimate * of the amount of data remaining and allocate a data buffer. */ struct rxrpc_txbuf *(*alloc_txbuf)(struct rxrpc_call *call, size_t remaining, gfp_t gfp); /* impose security on a packet */ int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *); /* verify the security on a received packet */ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *); /* Free crypto request on a call */ void (*free_call_crypto)(struct rxrpc_call *); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); /* Validate a challenge packet */ bool (*validate_challenge)(struct rxrpc_connection *conn, struct sk_buff *skb); /* Fill out the cmsg for recvmsg() to pass on a challenge to userspace. * The security class gets to add additional information. */ int (*challenge_to_recvmsg)(struct rxrpc_connection *conn, struct sk_buff *challenge, struct msghdr *msg); /* Parse sendmsg() control message and respond to challenge. */ int (*sendmsg_respond_to_challenge)(struct sk_buff *challenge, struct msghdr *msg); /* respond to a challenge */ int (*respond_to_challenge)(struct rxrpc_connection *conn, struct sk_buff *challenge); /* verify a response */ int (*verify_response)(struct rxrpc_connection *, struct sk_buff *); /* clear connection security */ void (*clear)(struct rxrpc_connection *); /* Default ticket -> key decoder */ int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb, unsigned int ticket_offset, unsigned int ticket_len, struct key **_key); }; /* * RxRPC local transport endpoint description * - owned by a single AF_RXRPC socket * - pointed to by transport socket struct sk_user_data */ struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ refcount_t ref; /* Number of references to the structure */ struct net *net; /* The network namespace */ struct rxrpc_net *rxnet; /* Our bits in the network namespace */ struct hlist_node link; struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread started */ struct page_frag_cache tx_alloc; /* Tx control packet allocation (I/O thread only) */ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY struct sk_buff_head rx_delay_queue; /* Delay injection queue */ #endif struct sk_buff_head rx_queue; /* Received packets */ struct list_head conn_attend_q; /* Conns requiring immediate attention */ struct list_head call_attend_q; /* Calls requiring immediate attention */ struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ bool kill_all_client_conns; struct list_head idle_client_conns; struct timer_list client_conn_reap_timer; unsigned long client_conn_flags; #define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */ spinlock_t lock; /* access lock */ rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; bool service_closed; /* Service socket closed */ struct idr conn_ids; /* List of connection IDs */ struct list_head new_client_calls; /* Newly created client calls need connection */ spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ union { /* Provide a kvec table sufficiently large to manage either a * DATA packet with a maximum set of jumbo subpackets or a PING * ACK padded out to 64K with zeropages for PMTUD. */ struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; struct bio_vec bvec[3 + 16]; }; }; /* * RxRPC remote transport endpoint definition * - matched by local endpoint, remote port, address and protocol type */ struct rxrpc_peer { struct rcu_head rcu; /* This must be first */ refcount_t ref; unsigned long hash_key; struct hlist_node hash_link; struct rxrpc_local *local; struct hlist_head error_targets; /* targets for net error distribution */ struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ unsigned long app_data; /* Application data (e.g. afs_server) */ unsigned int last_tx_at; /* Last time packet sent here (time64_t LSW) */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ int debug_id; /* debug ID for printks */ struct sockaddr_rxrpc srx; /* remote address */ /* Path MTU discovery [RFC8899] */ unsigned int pmtud_trial; /* Current MTU probe size */ unsigned int pmtud_good; /* Largest working MTU probe we've tried */ unsigned int pmtud_bad; /* Smallest non-working MTU probe we've tried */ bool pmtud_lost; /* T if MTU probe was lost */ bool pmtud_probing; /* T if we have an active probe outstanding */ bool pmtud_pending; /* T if a call to this peer should send a probe */ u8 pmtud_jumbo; /* Max jumbo packets for the MTU */ bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */ unsigned int ackr_max_data; /* Maximum data advertised by peer */ unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */ unsigned int max_data; /* Maximum packet data capacity for this peer */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ unsigned short tx_seg_max; /* Maximum number of transmissable segments */ /* Calculated RTT cache */ unsigned int recent_srtt_us; unsigned int recent_rto_us; u8 cong_ssthresh; /* Congestion slow-start threshold */ }; /* * Keys for matching a connection. */ struct rxrpc_conn_proto { union { struct { u32 epoch; /* epoch of this connection */ u32 cid; /* connection ID */ }; u64 index_key; }; }; struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Representation of remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ u16 service_id; /* Service ID for this connection */ u32 security_level; /* Security level selected */ }; /* * Call completion condition (state == RXRPC_CALL_COMPLETE). */ enum rxrpc_call_completion { RXRPC_CALL_SUCCEEDED, /* - Normal termination */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ NR__RXRPC_CALL_COMPLETIONS }; /* * Bits in the connection flags. */ enum rxrpc_conn_flag { RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */ RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */ }; #define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \ (1UL << RXRPC_CONN_FINAL_ACK_1) | \ (1UL << RXRPC_CONN_FINAL_ACK_2) | \ (1UL << RXRPC_CONN_FINAL_ACK_3)) /* * Events that can be raised upon a connection. */ enum rxrpc_conn_event { RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */ }; /* * The connection protocol state. */ enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */ RXRPC_CONN_CLIENT, /* Client connection */ RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ RXRPC_CONN_ABORTED, /* Conn aborted */ RXRPC_CONN__NR_STATES }; /* * RxRPC client connection bundle. */ struct rxrpc_bundle { struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ struct list_head proc_link; /* Link in net->bundle_proc_list */ const struct rxrpc_security *security; /* applied security module */ refcount_t ref; atomic_t active; /* Number of active users */ unsigned int debug_id; u32 security_level; /* Security level selected */ u16 service_id; /* Service ID for this connection */ bool try_upgrade; /* True if the bundle is attempting upgrade */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ unsigned short alloc_error; /* Error from last conn allocation */ struct rb_node local_node; /* Node in local->client_conns */ struct list_head waiting_calls; /* Calls waiting for channels */ unsigned long avail_chans; /* Mask of available channels */ unsigned int conn_ids[4]; /* Connection IDs. */ struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */ }; /* * RxRPC connection definition * - matched by { local, peer, epoch, conn_id, direction } * - each connection can only handle four simultaneous calls */ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ struct key *key; /* Security details */ struct list_head attend_link; /* Link in local->conn_attend_q */ refcount_t ref; atomic_t active; /* Active count for service conns */ struct rcu_head rcu; struct list_head cache_link; unsigned char act_chans; /* Mask of active channels */ struct rxrpc_channel { unsigned long final_ack_at; /* Time at which to issue final ACK */ struct rxrpc_call *call; /* Active call */ unsigned int call_debug_id; /* call->debug_id */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ u32 last_call; /* ID of last call */ u8 last_type; /* Type of last packet */ union { u32 last_seq; u32 last_abort; }; } channels[RXRPC_MAXCALLS]; struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ struct work_struct destructor; /* In-process-context destroyer */ struct rxrpc_bundle *bundle; /* Client connection bundle */ struct rb_node service_node; /* Node in peer->service_conns */ struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ struct page_frag_cache tx_data_alloc; /* Tx DATA packet allocation */ struct mutex tx_data_alloc_lock; struct mutex security_lock; /* Lock for security management */ const struct rxrpc_security *security; /* applied security module */ union { struct { struct crypto_sync_skcipher *cipher; /* encryption handle */ struct rxrpc_crypt csum_iv; /* packet checksum base */ u32 nonce; /* response re-use preventer */ } rxkad; struct { struct rxgk_context *keys[4]; /* (Re-)keying buffer */ u64 start_time; /* The start time for TK derivation */ u8 nonce[20]; /* Response re-use preventer */ u32 enctype; /* Kerberos 5 encoding type */ u32 key_number; /* Current key number */ } rxgk; }; rwlock_t security_use_lock; /* Security use/modification lock */ struct sk_buff *tx_response; /* Response packet to be transmitted */ unsigned long flags; unsigned long events; unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_proto_state state; /* current state of connection */ enum rxrpc_call_completion completion; /* Completion condition */ s32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ rxrpc_serial_t pmtud_probe; /* Serial of MTU probe (or 0) */ unsigned int pmtud_call; /* ID of call used for probe */ u32 service_id; /* Service ID, possibly upgraded */ u32 security_level; /* Security level selected */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ u8 bundle_shift; /* Index into bundle->avail_chans */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ u16 orig_service_id; /* Originally requested service ID */ short error; /* Local error code */ }; static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) { return sp->hdr.flags & RXRPC_CLIENT_INITIATED; } static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp) { return !rxrpc_to_server(sp); } /* * Flags in call->flags. */ enum rxrpc_call_flag { RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ RXRPC_CALL_TX_ALL_ACKED, /* Last packet has been hard-acked */ RXRPC_CALL_TX_NO_MORE, /* No more data to transmit (MSG_MORE deasserted) */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ RXRPC_CALL_KERNEL, /* The call was made by the kernel */ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ RXRPC_CALL_EV_INITIAL_PING, /* Send initial ping for a new service call */ }; /* * The states that a call can be in. */ enum rxrpc_call_state { RXRPC_CALL_UNINITIALISED, RXRPC_CALL_CLIENT_AWAIT_CONN, /* - client waiting for connection to become available */ RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ RXRPC_CALL_COMPLETE, /* - call complete */ NR__RXRPC_CALL_STATES }; /* * Call Tx congestion management modes. */ enum rxrpc_ca_state { RXRPC_CA_SLOW_START, RXRPC_CA_CONGEST_AVOIDANCE, RXRPC_CA_PACKET_LOSS, RXRPC_CA_FAST_RETRANSMIT, NR__RXRPC_CA_STATES } __mode(byte); /* * Current purpose of call RACK timer. According to the RACK-TLP protocol * [RFC8985], the transmission timer (call->rack_timo_at) may only be used for * one of these at once. */ enum rxrpc_rack_timer_mode { RXRPC_CALL_RACKTIMER_OFF, /* Timer not running */ RXRPC_CALL_RACKTIMER_RACK_REORDER, /* RACK reordering timer */ RXRPC_CALL_RACKTIMER_TLP_PTO, /* TLP timeout */ RXRPC_CALL_RACKTIMER_RTO, /* Retransmission timeout */ } __mode(byte); /* * RxRPC call definition * - matched by { connection, call_id } */ struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_bundle *bundle; /* Connection bundle to use */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ struct key *key; /* Security details */ const struct rxrpc_security *security; /* applied security module */ struct mutex user_mutex; /* User access mutex */ struct sockaddr_rxrpc dest_srx; /* Destination address */ ktime_t delay_ack_at; /* When DELAY ACK needs to happen */ ktime_t rack_timo_at; /* When ACK is figured as lost */ ktime_t ping_at; /* When next to send a ping */ ktime_t keepalive_at; /* When next to send a keepalive ping */ ktime_t expect_rx_by; /* When we expect to get a packet by */ ktime_t expect_req_by; /* When we expect to get a request DATA packet by */ ktime_t expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (ms) */ u32 next_req_timo; /* Timeout for next Rx request packet (ms) */ u32 hard_timo; /* Maximum lifetime or 0 (s) */ struct timer_list timer; /* Combined event timer */ struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head wait_link; /* Link in local->new_client_calls */ struct hlist_node error_link; /* link in error distribution list */ struct list_head accept_link; /* Link in rx->acceptq */ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ struct list_head sock_link; /* Link in rx->sock_calls */ struct rb_node sock_node; /* Node in rx->calls */ struct list_head attend_link; /* Link in local->call_attend_q */ struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ s64 tx_total_len; /* Total length left to be transmitted (or -1) */ unsigned long user_call_ID; /* user-defined call ID */ unsigned long flags; unsigned long events; spinlock_t notify_lock; /* Kernel notification lock */ unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */ s32 send_abort; /* Abort code to be sent */ short send_abort_err; /* Error to be associated with the abort */ rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */ s32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ enum rxrpc_call_state _state; /* Current state of call (needs barrier) */ enum rxrpc_call_completion completion; /* Call completion condition */ refcount_t ref; u8 security_ix; /* Security type */ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ u32 security_level; /* Security level selected */ u32 security_enctype; /* Security-specific encoding type (or 0) */ int debug_id; /* debug ID for printks */ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ /* Sendmsg data tracking. */ rxrpc_seq_t send_top; /* Highest Tx slot filled by sendmsg. */ struct rxrpc_txqueue *send_queue; /* Queue that sendmsg is writing into */ /* Transmitted data tracking. */ struct rxrpc_txqueue *tx_queue; /* Start of transmission buffers */ struct rxrpc_txqueue *tx_qtail; /* End of transmission buffers */ rxrpc_seq_t tx_qbase; /* First slot in tx_queue */ rxrpc_seq_t tx_bottom; /* First packet in buffer */ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ rxrpc_serial_t tx_last_serial; /* Serial of last DATA transmitted */ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ u16 tx_nr_sent; /* Number of packets sent, but unacked */ u16 tx_nr_lost; /* Number of packets marked lost */ u16 tx_nr_resent; /* Number of packets resent, but unacked */ u16 tx_winsize; /* Maximum size of Tx window */ #define RXRPC_TX_MAX_WINDOW 128 u8 tx_jumbo_max; /* Maximum subpkts peer will accept */ ktime_t tx_last_sent; /* Last time a transmission occurred */ /* Received data tracking */ struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ rxrpc_seq_t rx_consumed; /* Highest packet consumed */ rxrpc_serial_t rx_serial; /* Highest serial received for this call */ u8 rx_winsize; /* Size of Rx window */ /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS * is fixed, we keep these numbers in terms of segments (ie. DATA * packets) rather than bytes. */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN #define RXRPC_MIN_CWND 4 enum rxrpc_ca_state cong_ca_state; /* Congestion control state */ u8 cong_extra; /* Extra to send for congestion management */ u16 cong_cwnd; /* Congestion window size */ u16 cong_ssthresh; /* Slow-start threshold */ u16 cong_dup_acks; /* Count of ACKs showing missing packets */ u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ /* RACK-TLP [RFC8985] state. */ ktime_t rack_xmit_ts; /* Latest transmission timestamp */ ktime_t rack_rtt; /* RTT of most recently ACK'd segment */ ktime_t rack_rtt_ts; /* Timestamp of rack_rtt */ ktime_t rack_reo_wnd; /* Reordering window */ unsigned int rack_reo_wnd_mult; /* Multiplier applied to rack_reo_wnd */ int rack_reo_wnd_persist; /* Num loss recoveries before reset reo_wnd */ rxrpc_seq_t rack_fack; /* Highest sequence so far ACK'd */ rxrpc_seq_t rack_end_seq; /* Highest sequence seen */ rxrpc_seq_t rack_dsack_round; /* DSACK opt recv'd in latest roundtrip */ bool rack_dsack_round_none; /* T if dsack_round is "None" */ bool rack_reordering_seen; /* T if detected reordering event */ enum rxrpc_rack_timer_mode rack_timer_mode; /* Current mode of RACK timer */ bool tlp_is_retrans; /* T if unacked TLP retransmission */ rxrpc_serial_t tlp_serial; /* Serial of TLP probe (or 0 if none in progress) */ rxrpc_seq_t tlp_seq; /* Sequence of TLP probe */ unsigned int tlp_rtt_taken; /* Last time RTT taken */ ktime_t tlp_max_ack_delay; /* Sender budget for max delayed ACK interval */ /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ u16 ackr_sack_base; /* Starting slot in SACK table ring */ rxrpc_seq_t ackr_window; /* Base of SACK window */ rxrpc_seq_t ackr_wtop; /* Base of SACK window */ unsigned int ackr_nr_unacked; /* Number of unacked packets */ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ struct { #define RXRPC_SACK_SIZE 256 /* SACK table for soft-acked packets */ u8 ackr_sack_table[RXRPC_SACK_SIZE]; } __aligned(8); /* RTT management */ rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */ ktime_t rtt_sent_at[4]; /* Time packet sent */ unsigned long rtt_avail; /* Mask of available slots in bits 0-3, * Mask of pending samples in 8-11 */ #define RXRPC_CALL_RTT_AVAIL_MASK 0xf #define RXRPC_CALL_RTT_PEND_SHIFT 8 /* Transmission-phase ACK management (ACKs we've received). */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_seq_t acks_hard_ack; /* Highest sequence hard acked */ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ unsigned short acks_nr_sacks; /* Number of soft acks recorded */ unsigned short acks_nr_snacks; /* Number of soft nacks recorded */ /* Calculated RTT cache */ ktime_t rtt_last_req; /* Time of last RTT request */ unsigned int rtt_count; /* Number of samples we've got */ unsigned int rtt_taken; /* Number of samples taken (wrapping) */ struct minmax min_rtt; /* Estimated minimum RTT */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rto_us; /* Retransmission timeout in usec */ u8 backoff; /* Backoff timeout (as shift) */ }; /* * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { rxrpc_serial_t ack_serial; /* Serial number of ACK */ rxrpc_serial_t acked_serial; /* Serial number ACK'd */ u16 in_flight; /* Number of unreceived transmissions */ u16 nr_new_hacks; /* Number of rotated new ACKs */ u16 nr_new_sacks; /* Number of new soft ACKs in packet */ u16 nr_new_snacks; /* Number of new soft nacks in packet */ u8 ack_reason; bool new_low_snack:1; /* T if new low soft NACK found */ bool retrans_timeo:1; /* T if reTx due to timeout happened */ bool need_retransmit:1; /* T if we need transmission */ bool rtt_sample_avail:1; /* T if RTT sample available */ bool in_fast_or_rto_recovery:1; bool exiting_fast_or_rto_recovery:1; bool tlp_probe_acked:1; /* T if the TLP probe seq was acked */ u8 /*enum rxrpc_congest_change*/ change; }; /* * sendmsg() cmsg-specified parameters. */ enum rxrpc_command { RXRPC_CMD_SEND_DATA, /* send data message */ RXRPC_CMD_SEND_ABORT, /* request abort generation */ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ RXRPC_CMD_CHARGE_ACCEPT, /* [server] charge accept preallocation */ }; struct rxrpc_call_params { s64 tx_total_len; /* Total Tx data length (if send data) */ unsigned long user_call_ID; /* User's call ID */ struct { u32 hard; /* Maximum lifetime (sec) */ u32 idle; /* Max time since last data packet (msec) */ u32 normal; /* Max time since last call packet (msec) */ } timeouts; u8 nr_timeouts; /* Number of timeouts specified */ bool kernel; /* T if kernel is making the call */ enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */ }; struct rxrpc_send_params { struct rxrpc_call_params call; u32 abort_code; /* Abort code to Tx (if abort) */ enum rxrpc_command command : 8; /* The command to implement */ bool exclusive; /* Shared or exclusive call */ bool upgrade; /* If the connection is upgradeable */ }; /* * Buffer of data to be output as a packet. */ struct rxrpc_txbuf { refcount_t ref; rxrpc_seq_t seq; /* Sequence number of this packet */ rxrpc_serial_t serial; /* Last serial number transmitted with */ unsigned int call_debug_id; unsigned int debug_id; unsigned short len; /* Amount of data in buffer */ unsigned short space; /* Remaining data space */ unsigned short offset; /* Offset of fill point */ unsigned short crypto_header; /* Size of crypto header */ unsigned short sec_header; /* Size of security header */ unsigned short pkt_len; /* Size of packet content */ unsigned short alloc_size; /* Amount of bufferage allocated */ unsigned int flags; #define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ bool jumboable; /* Can be non-terminal jumbo subpacket */ void *data; /* Data with preceding jumbo header */ }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) { return txb->flags & RXRPC_CLIENT_INITIATED; } static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) { return !rxrpc_sending_to_server(txb); } /* * Transmit queue element, including RACK [RFC8985] per-segment metadata. The * transmission timestamp is in usec from the base. */ struct rxrpc_txqueue { /* Start with the members we want to prefetch. */ struct rxrpc_txqueue *next; ktime_t xmit_ts_base; rxrpc_seq_t qbase; u8 nr_reported_acks; /* Number of segments explicitly acked/nacked */ unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */ unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */ unsigned long ever_retransmitted; /* Bit-per-buf: Set if ever retransmitted */ /* The arrays we want to pack into as few cache lines as possible. */ struct { #define RXRPC_NR_TXQUEUE BITS_PER_LONG #define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; unsigned int segment_serial[RXRPC_NR_TXQUEUE]; unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; } ____cacheline_aligned; }; /* * Data transmission request. */ struct rxrpc_send_data_req { ktime_t now; /* Current time */ struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */ rxrpc_seq_t seq; /* Sequence of first data */ int n; /* Number of DATA packets to glue into jumbo */ bool retrans; /* T if this is a retransmission */ bool did_send; /* T if did actually send */ bool tlp_probe; /* T if this is a TLP probe */ int /* enum rxrpc_txdata_trace */ trace; }; #include <trace/events/rxrpc.h> /* * Allocate the next serial number on a connection. 0 must be skipped. */ static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn) { rxrpc_serial_t serial; serial = conn->tx_serial; if (serial == 0) serial = 1; conn->tx_serial = serial + 1; return serial; } /* * Allocate the next serial n numbers on a connection. 0 must be skipped. */ static inline rxrpc_serial_t rxrpc_get_next_serials(struct rxrpc_connection *conn, unsigned int n) { rxrpc_serial_t serial; serial = conn->tx_serial; if (serial + n <= n) serial = 1; conn->tx_serial = serial + n; return serial; } /* * af_rxrpc.c */ extern atomic_t rxrpc_n_rx_skbs; extern struct workqueue_struct *rxrpc_workqueue; /* * call_accept.c */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); bool rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb); int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); /* * call_event.c */ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); void rxrpc_resend_tlp(struct rxrpc_call *call); void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit, enum rxrpc_txdata_trace trace); bool rxrpc_input_call_event(struct rxrpc_call *call); /* * call_object.c */ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern struct kmem_cache *rxrpc_call_jar; void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what); struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct rxrpc_call_params *, gfp_t, unsigned int) __releases(&rx->sk.sk_lock) __acquires(&call->user_mutex); void rxrpc_start_call_timer(struct rxrpc_call *call); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); void rxrpc_see_call(struct rxrpc_call *, enum rxrpc_call_trace); struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); void rxrpc_destroy_all_calls(struct rxrpc_net *); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) { return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags); } static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) { return !rxrpc_is_service_call(call); } /* * call_state.c */ bool rxrpc_set_call_completion(struct rxrpc_call *call, enum rxrpc_call_completion compl, u32 abort_code, int error); bool rxrpc_call_completed(struct rxrpc_call *call); bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, u32 abort_code, int error, enum rxrpc_abort_reason why); void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, int error); static inline void rxrpc_set_call_state(struct rxrpc_call *call, enum rxrpc_call_state state) { /* Order write of completion info before write of ->state. */ smp_store_release(&call->_state, state); wake_up(&call->waitq); } static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call) { return call->_state; /* Only inside I/O thread */ } static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call) { return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; } static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call) { /* Order read ->state before read of completion info. */ return smp_load_acquire(&call->_state); } static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call) { return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; } static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call) { return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED; } /* * conn_client.c */ extern unsigned int rxrpc_reap_client_connections; extern unsigned long rxrpc_conn_idle_client_expiry; extern unsigned long rxrpc_conn_idle_client_fast_expiry; void rxrpc_purge_client_connections(struct rxrpc_local *local); struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp); void rxrpc_connect_client_calls(struct rxrpc_local *local); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); void rxrpc_discard_expired_client_conns(struct rxrpc_local *local); void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* * conn_event.c */ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, unsigned int channel); int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, s32 abort_code, int err, enum rxrpc_abort_reason why); void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb); static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn) { /* Order reading the abort info after the state check. */ return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED; } /* * conn_object.c */ extern unsigned int rxrpc_connection_expiry; extern unsigned int rxrpc_closed_conn_expiry; void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why); struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t); struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *, struct sockaddr_rxrpc *, struct sk_buff *); void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_client_conn(struct rxrpc_connection *); void rxrpc_queue_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_see_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_put_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_service_connection_reaper(struct work_struct *); void rxrpc_destroy_all_connections(struct rxrpc_net *); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) { return conn->out_clientflag; } static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) { return !rxrpc_conn_is_client(conn); } static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, unsigned long expire_at) { timer_reduce(&conn->timer, expire_at); } /* * conn_service.c */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *, const struct rxrpc_security *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ void rxrpc_congestion_degrade(struct rxrpc_call *); void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *); void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); /* * input_rack.c */ void rxrpc_input_rack_one(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, struct rxrpc_txqueue *tq, unsigned int ix); void rxrpc_input_rack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, struct rxrpc_txqueue *tq, unsigned long new_acks); void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call, struct rxrpc_ack_summary *summary); ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now); void rxrpc_tlp_send_probe(struct rxrpc_call *call); void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary); void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by); /* Initialise TLP state [RFC8958 7.1]. */ static inline void rxrpc_tlp_init(struct rxrpc_call *call) { call->tlp_serial = 0; call->tlp_seq = call->acks_hard_ack; call->tlp_is_retrans = false; } /* * io_thread.c */ int rxrpc_encap_rcv(struct sock *, struct sk_buff *); void rxrpc_error_report(struct sock *); bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, s32 abort_code, int err); bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, s32 abort_code, int err); int rxrpc_io_thread(void *data); void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb); static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) { if (!local->io_thread) return; wake_up_process(READ_ONCE(local->io_thread)); } static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why) { return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO); } /* * insecure.c */ extern const struct rxrpc_security rxrpc_no_security; /* * key.c */ extern struct key_type key_type_rxrpc; int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int); int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, u32); /* * local_event.c */ void rxrpc_gen_version_string(void); void rxrpc_send_version_request(struct rxrpc_local *local, struct rxrpc_host_header *hdr, struct sk_buff *skb); /* * local_object.c */ void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set); struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace); void rxrpc_put_local(struct rxrpc_local *, enum rxrpc_local_trace); struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *, enum rxrpc_local_trace); void rxrpc_unuse_local(struct rxrpc_local *, enum rxrpc_local_trace); void rxrpc_destroy_local(struct rxrpc_local *local); void rxrpc_destroy_all_locals(struct rxrpc_net *); static inline bool __rxrpc_use_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { int r, u; r = refcount_read(&local->ref); u = atomic_fetch_add_unless(&local->active_users, 1, 0); trace_rxrpc_local(local->debug_id, why, r, u); return u != 0; } static inline void rxrpc_see_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { int r, u; r = refcount_read(&local->ref); u = atomic_read(&local->active_users); trace_rxrpc_local(local->debug_id, why, r, u); } /* * misc.c */ extern unsigned int rxrpc_max_backlog __read_mostly; extern unsigned long rxrpc_soft_ack_delay; extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY extern unsigned long rxrpc_inject_rx_delay; #endif /* * net_ns.c */ extern unsigned int rxrpc_net_id; extern struct pernet_operations rxrpc_net_ops; static inline struct rxrpc_net *rxrpc_net(struct net *net) { return net_generic(net, rxrpc_net_id); } /* * out_of_band.c */ void rxrpc_notify_socket_oob(struct rxrpc_call *call, struct sk_buff *skb); void rxrpc_add_pending_oob(struct rxrpc_sock *rx, struct sk_buff *skb); int rxrpc_sendmsg_oob(struct rxrpc_sock *rx, struct msghdr *msg, size_t len); /* * output.c */ ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len); void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); int rxrpc_send_abort_packet(struct rxrpc_call *); void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *skb); /* * peer_event.c */ void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *); void rxrpc_peer_keepalive_worker(struct work_struct *); void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial, bool sendmsg_fail); /* Update the last transmission time on a peer for keepalive purposes. */ static inline void rxrpc_peer_mark_tx(struct rxrpc_peer *peer) { /* To avoid tearing on 32-bit systems, we only keep the LSW. */ WRITE_ONCE(peer->last_tx_at, ktime_get_seconds()); } /* * peer_object.c */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp); void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, enum rxrpc_peer_trace); void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace); void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); /* * proc.c */ extern const struct seq_operations rxrpc_call_seq_ops; extern const struct seq_operations rxrpc_connection_seq_ops; extern const struct seq_operations rxrpc_bundle_seq_ops; extern const struct seq_operations rxrpc_peer_seq_ops; extern const struct seq_operations rxrpc_local_seq_ops; /* * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * Abort a call due to a protocol error. */ static inline int rxrpc_abort_eproto(struct rxrpc_call *call, struct sk_buff *skb, s32 abort_code, enum rxrpc_abort_reason why) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why); return -EPROTO; } /* * rtt.c */ void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int rtt_slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, ktime_t send_time, ktime_t resp_time); ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans); void rxrpc_call_init_rtt(struct rxrpc_call *call); /* * rxgk.c */ extern const struct rxrpc_security rxgk_yfs; /* * rxkad.c */ #ifdef CONFIG_RXKAD extern const struct rxrpc_security rxkad; #endif /* * security.c */ int __init rxrpc_init_security(void); const struct rxrpc_security *rxrpc_security_lookup(u8); void rxrpc_exit_security(void); int rxrpc_init_client_call_security(struct rxrpc_call *); int rxrpc_init_client_conn_security(struct rxrpc_connection *); const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *, struct sk_buff *); struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, struct sk_buff *, u32, u32); /* * sendmsg.c */ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, enum rxrpc_abort_reason why); int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * server_key.c */ extern struct key_type key_type_rxrpc_s; int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); /* * skbuff.c */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* * stats.c */ int rxrpc_stats_show(struct seq_file *seq, void *v); int rxrpc_stats_clear(struct file *file, char *buf, size_t size); #define rxrpc_inc_stat(rxnet, s) atomic_inc(&(rxnet)->s) #define rxrpc_dec_stat(rxnet, s) atomic_dec(&(rxnet)->s) /* * sysctl.c */ #ifdef CONFIG_SYSCTL extern int __init rxrpc_sysctl_init(void); extern void rxrpc_sysctl_exit(void); #else static inline int __init rxrpc_sysctl_init(void) { return 0; } static inline void rxrpc_sysctl_exit(void) {} #endif /* * txbuf.c */ extern atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp); void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); /* * utils.c */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); static inline bool before(u32 seq1, u32 seq2) { return (s32)(seq1 - seq2) < 0; } static inline bool before_eq(u32 seq1, u32 seq2) { return (s32)(seq1 - seq2) <= 0; } static inline bool after(u32 seq1, u32 seq2) { return (s32)(seq1 - seq2) > 0; } static inline bool after_eq(u32 seq1, u32 seq2) { return (s32)(seq1 - seq2) >= 0; } static inline u32 earliest(u32 seq1, u32 seq2) { return before(seq1, seq2) ? seq1 : seq2; } static inline u32 latest(u32 seq1, u32 seq2) { return after(seq1, seq2) ? seq1 : seq2; } static inline bool rxrpc_seq_in_txq(const struct rxrpc_txqueue *tq, rxrpc_seq_t seq) { return (seq & (RXRPC_NR_TXQUEUE - 1)) == tq->qbase; } static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); __skb_queue_tail(&call->rx_queue, skb); rxrpc_poke_call(call, rxrpc_call_poke_rx_packet); } /* * Calculate how much space there is for transmitting more DATA packets. */ static inline unsigned int rxrpc_tx_window_space(const struct rxrpc_call *call) { int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); int transmitted = call->tx_top - call->tx_bottom; return max(winsize - transmitted, 0); } static inline unsigned int rxrpc_left_out(const struct rxrpc_call *call) { return call->acks_nr_sacks + call->tx_nr_lost; } /* * Calculate the number of transmitted DATA packets assumed to be in flight * [approx RFC6675]. */ static inline unsigned int rxrpc_tx_in_flight(const struct rxrpc_call *call) { return call->tx_nr_sent - rxrpc_left_out(call) + call->tx_nr_resent; } /* * debug tracing */ extern unsigned int rxrpc_debug; #define dbgprintk(FMT,...) \ printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) #if defined(__KDEBUG) #define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) #define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) #define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) #elif defined(CONFIG_AF_RXRPC_DEBUG) #define RXRPC_DEBUG_KENTER 0x01 #define RXRPC_DEBUG_KLEAVE 0x02 #define RXRPC_DEBUG_KDEBUG 0x04 #define _enter(FMT,...) \ do { \ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER)) \ kenter(FMT,##__VA_ARGS__); \ } while (0) #define _leave(FMT,...) \ do { \ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE)) \ kleave(FMT,##__VA_ARGS__); \ } while (0) #define _debug(FMT,...) \ do { \ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG)) \ kdebug(FMT,##__VA_ARGS__); \ } while (0) #else #define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) #endif /* * debug assertion checking */ #if 1 // defined(__KDEBUGALL) #define ASSERT(X) \ do { \ if (unlikely(!(X))) { \ pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) #define ASSERTCMP(X, OP, Y) \ do { \ __typeof__(X) _x = (X); \ __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely(!(_x OP _y))) { \ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ (unsigned long)_x, (unsigned long)_x, #OP, \ (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) #define ASSERTIF(C, X) \ do { \ if (unlikely((C) && !(X))) { \ pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) #define ASSERTIFCMP(C, X, OP, Y) \ do { \ __typeof__(X) _x = (X); \ __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely((C) && !(_x OP _y))) { \ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ (unsigned long)_x, (unsigned long)_x, #OP, \ (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) #else #define ASSERT(X) \ do { \ } while (0) #define ASSERTCMP(X, OP, Y) \ do { \ } while (0) #define ASSERTIF(C, X) \ do { \ } while (0) #define ASSERTIFCMP(C, X, OP, Y) \ do { \ } while (0) #endif /* __KDEBUGALL */
4 1 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "truncate.h" struct io_ftrunc { struct file *file; loff_t len; }; int io_ftruncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc); if (sqe->rw_flags || sqe->addr || sqe->len || sqe->buf_index || sqe->splice_fd_in || sqe->addr3) return -EINVAL; ft->len = READ_ONCE(sqe->off); req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags) { struct io_ftrunc *ft = io_kiocb_to_cmd(req, struct io_ftrunc); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_ftruncate(req->file, ft->len, 0); io_req_set_res(req, ret, 0); return IOU_COMPLETE; }
3 3 3 3 3 3 3 3 1 1 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA501 chip based cameras initialization data * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca501" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA501 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ unsigned short contrast; __u8 brightness; __u8 colors; __u8 blue_balance; __u8 red_balance; char subtype; #define Arowana300KCMOSCamera 0 #define IntelCreateAndShare 1 #define KodakDVC325 2 #define MystFromOriUnknownCamera 3 #define SmileIntlCamera 4 #define ThreeComHomeConnectLite 5 #define ViewQuestM318B 6 }; static const struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA501, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA501, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {640, 480, V4L2_PIX_FMT_SPCA501, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; #define SPCA50X_REG_USB 0x2 /* spca505 501 */ /* * Data to initialize a SPCA501. From a capture file provided by Bill Roehl * With SPCA501 chip description */ #define CCDSP_SET /* set CCDSP parameters */ #define TG_SET /* set time generator set */ #undef DSPWIN_SET /* set DSP windows parameters */ #undef ALTER_GAMA /* Set alternate set to YUV transform coeffs. */ #define SPCA501_SNAPBIT 0x80 #define SPCA501_SNAPCTRL 0x10 /* Frame packet header offsets for the spca501 */ #define SPCA501_OFFSET_GPIO 1 #define SPCA501_OFFSET_TYPE 2 #define SPCA501_OFFSET_TURN3A 3 #define SPCA501_OFFSET_FRAMSEQ 4 #define SPCA501_OFFSET_COMPRESS 5 #define SPCA501_OFFSET_QUANT 6 #define SPCA501_OFFSET_QUANT2 7 #define SPCA501_OFFSET_DATA 8 #define SPCA501_PROP_COMP_ENABLE(d) ((d) & 1) #define SPCA501_PROP_SNAP(d) ((d) & 0x40) #define SPCA501_PROP_SNAP_CTRL(d) ((d) & 0x10) #define SPCA501_PROP_COMP_THRESH(d) (((d) & 0x0e) >> 1) #define SPCA501_PROP_COMP_QUANT(d) (((d) & 0x70) >> 4) /* SPCA501 CCDSP control */ #define SPCA501_REG_CCDSP 0x01 /* SPCA501 control/status registers */ #define SPCA501_REG_CTLRL 0x02 /* registers for color correction and YUV transformation */ #define SPCA501_A11 0x08 #define SPCA501_A12 0x09 #define SPCA501_A13 0x0A #define SPCA501_A21 0x0B #define SPCA501_A22 0x0C #define SPCA501_A23 0x0D #define SPCA501_A31 0x0E #define SPCA501_A32 0x0F #define SPCA501_A33 0x10 /* Data for video camera initialization before capturing */ static const __u16 spca501_open_data[][3] = { /* bmRequest,value,index */ {0x2, 0x50, 0x00}, /* C/S enable soft reset */ {0x2, 0x40, 0x00}, /* C/S disable soft reset */ {0x2, 0x02, 0x05}, /* C/S general purpose I/O data */ {0x2, 0x03, 0x05}, /* C/S general purpose I/O data */ #ifdef CCDSP_SET {0x1, 0x38, 0x01}, /* CCDSP options */ {0x1, 0x05, 0x02}, /* CCDSP Optical black level for user settings */ {0x1, 0xC0, 0x03}, /* CCDSP Optical black settings */ {0x1, 0x67, 0x07}, {0x1, 0x63, 0x3f}, /* CCDSP CCD gamma enable */ {0x1, 0x03, 0x56}, /* Add gamma correction */ {0x1, 0xFF, 0x15}, /* CCDSP High luminance for white balance */ {0x1, 0x01, 0x16}, /* CCDSP Low luminance for white balance */ /* Color correction and RGB-to-YUV transformation coefficients changing */ #ifdef ALTER_GAMA {0x0, 0x00, 0x08}, /* A11 */ {0x0, 0x00, 0x09}, /* A12 */ {0x0, 0x90, 0x0A}, /* A13 */ {0x0, 0x12, 0x0B}, /* A21 */ {0x0, 0x00, 0x0C}, /* A22 */ {0x0, 0x00, 0x0D}, /* A23 */ {0x0, 0x00, 0x0E}, /* A31 */ {0x0, 0x02, 0x0F}, /* A32 */ {0x0, 0x00, 0x10}, /* A33 */ #else {0x1, 0x2a, 0x08}, /* A11 0x31 */ {0x1, 0xf8, 0x09}, /* A12 f8 */ {0x1, 0xf8, 0x0A}, /* A13 f8 */ {0x1, 0xf8, 0x0B}, /* A21 f8 */ {0x1, 0x14, 0x0C}, /* A22 0x14 */ {0x1, 0xf8, 0x0D}, /* A23 f8 */ {0x1, 0xf8, 0x0E}, /* A31 f8 */ {0x1, 0xf8, 0x0F}, /* A32 f8 */ {0x1, 0x20, 0x10}, /* A33 0x20 */ #endif {0x1, 0x00, 0x11}, /* R offset */ {0x1, 0x00, 0x12}, /* G offset */ {0x1, 0x00, 0x13}, /* B offset */ {0x1, 0x00, 0x14}, /* GB offset */ #endif #ifdef TG_SET /* Time generator manipulations */ {0x0, 0xfc, 0x0}, /* Set up high bits of shutter speed */ {0x0, 0x01, 0x1}, /* Set up low bits of shutter speed */ {0x0, 0xe4, 0x04}, /* DCLK*2 clock phase adjustment */ {0x0, 0x08, 0x05}, /* ADCK phase adjustment, inv. ext. VB */ {0x0, 0x03, 0x06}, /* FR phase adjustment */ {0x0, 0x01, 0x07}, /* FCDS phase adjustment */ {0x0, 0x39, 0x08}, /* FS phase adjustment */ {0x0, 0x88, 0x0a}, /* FH1 phase and delay adjustment */ {0x0, 0x03, 0x0f}, /* pixel identification */ {0x0, 0x00, 0x11}, /* clock source selection (default) */ /*VERY strange manipulations with * select DMCLP or OBPX to be ADCLP output (0x0C) * OPB always toggle or not (0x0D) but they allow * us to set up brightness */ {0x0, 0x01, 0x0c}, {0x0, 0xe0, 0x0d}, /* Done */ #endif #ifdef DSPWIN_SET {0x1, 0xa0, 0x01}, /* Setting image processing parameters */ {0x1, 0x1c, 0x17}, /* Changing Windows positions X1 */ {0x1, 0xe2, 0x19}, /* X2 */ {0x1, 0x1c, 0x1b}, /* X3 */ {0x1, 0xe2, 0x1d}, /* X4 */ {0x1, 0x5f, 0x1f}, /* X5 */ {0x1, 0x32, 0x20}, /* Y5 */ {0x1, 0x01, 0x10}, /* Changing A33 */ #endif {0x2, 0x204a, 0x07},/* Setting video compression & resolution 160x120 */ {0x2, 0x94, 0x06}, /* Setting video no compression */ {} }; /* The SPCAxxx docs from Sunplus document these values in tables, one table per register number. In the data below, dmRequest is the register number, index is the Addr, and value is a combination of Bit values. Bit Value (hex) 0 01 1 02 2 04 3 08 4 10 5 20 6 40 7 80 */ /* Data for chip initialization (set default values) */ static const __u16 spca501_init_data[][3] = { /* Set all the values to powerup defaults */ /* bmRequest,value,index */ {0x0, 0xAA, 0x00}, {0x0, 0x02, 0x01}, {0x0, 0x01, 0x02}, {0x0, 0x02, 0x03}, {0x0, 0xCE, 0x04}, {0x0, 0x00, 0x05}, {0x0, 0x00, 0x06}, {0x0, 0x00, 0x07}, {0x0, 0x00, 0x08}, {0x0, 0x00, 0x09}, {0x0, 0x90, 0x0A}, {0x0, 0x12, 0x0B}, {0x0, 0x00, 0x0C}, {0x0, 0x00, 0x0D}, {0x0, 0x00, 0x0E}, {0x0, 0x02, 0x0F}, {0x0, 0x00, 0x10}, {0x0, 0x00, 0x11}, {0x0, 0x00, 0x12}, {0x0, 0x00, 0x13}, {0x0, 0x00, 0x14}, {0x0, 0x00, 0x15}, {0x0, 0x00, 0x16}, {0x0, 0x00, 0x17}, {0x0, 0x00, 0x18}, {0x0, 0x00, 0x19}, {0x0, 0x00, 0x1A}, {0x0, 0x00, 0x1B}, {0x0, 0x00, 0x1C}, {0x0, 0x00, 0x1D}, {0x0, 0x00, 0x1E}, {0x0, 0x00, 0x1F}, {0x0, 0x00, 0x20}, {0x0, 0x00, 0x21}, {0x0, 0x00, 0x22}, {0x0, 0x00, 0x23}, {0x0, 0x00, 0x24}, {0x0, 0x00, 0x25}, {0x0, 0x00, 0x26}, {0x0, 0x00, 0x27}, {0x0, 0x00, 0x28}, {0x0, 0x00, 0x29}, {0x0, 0x00, 0x2A}, {0x0, 0x00, 0x2B}, {0x0, 0x00, 0x2C}, {0x0, 0x00, 0x2D}, {0x0, 0x00, 0x2E}, {0x0, 0x00, 0x2F}, {0x0, 0x00, 0x30}, {0x0, 0x00, 0x31}, {0x0, 0x00, 0x32}, {0x0, 0x00, 0x33}, {0x0, 0x00, 0x34}, {0x0, 0x00, 0x35}, {0x0, 0x00, 0x36}, {0x0, 0x00, 0x37}, {0x0, 0x00, 0x38}, {0x0, 0x00, 0x39}, {0x0, 0x00, 0x3A}, {0x0, 0x00, 0x3B}, {0x0, 0x00, 0x3C}, {0x0, 0x00, 0x3D}, {0x0, 0x00, 0x3E}, {0x0, 0x00, 0x3F}, {0x0, 0x00, 0x40}, {0x0, 0x00, 0x41}, {0x0, 0x00, 0x42}, {0x0, 0x00, 0x43}, {0x0, 0x00, 0x44}, {0x0, 0x00, 0x45}, {0x0, 0x00, 0x46}, {0x0, 0x00, 0x47}, {0x0, 0x00, 0x48}, {0x0, 0x00, 0x49}, {0x0, 0x00, 0x4A}, {0x0, 0x00, 0x4B}, {0x0, 0x00, 0x4C}, {0x0, 0x00, 0x4D}, {0x0, 0x00, 0x4E}, {0x0, 0x00, 0x4F}, {0x0, 0x00, 0x50}, {0x0, 0x00, 0x51}, {0x0, 0x00, 0x52}, {0x0, 0x00, 0x53}, {0x0, 0x00, 0x54}, {0x0, 0x00, 0x55}, {0x0, 0x00, 0x56}, {0x0, 0x00, 0x57}, {0x0, 0x00, 0x58}, {0x0, 0x00, 0x59}, {0x0, 0x00, 0x5A}, {0x0, 0x00, 0x5B}, {0x0, 0x00, 0x5C}, {0x0, 0x00, 0x5D}, {0x0, 0x00, 0x5E}, {0x0, 0x00, 0x5F}, {0x0, 0x00, 0x60}, {0x0, 0x00, 0x61}, {0x0, 0x00, 0x62}, {0x0, 0x00, 0x63}, {0x0, 0x00, 0x64}, {0x0, 0x00, 0x65}, {0x0, 0x00, 0x66}, {0x0, 0x00, 0x67}, {0x0, 0x00, 0x68}, {0x0, 0x00, 0x69}, {0x0, 0x00, 0x6A}, {0x0, 0x00, 0x6B}, {0x0, 0x00, 0x6C}, {0x0, 0x00, 0x6D}, {0x0, 0x00, 0x6E}, {0x0, 0x00, 0x6F}, {0x0, 0x00, 0x70}, {0x0, 0x00, 0x71}, {0x0, 0x00, 0x72}, {0x0, 0x00, 0x73}, {0x0, 0x00, 0x74}, {0x0, 0x00, 0x75}, {0x0, 0x00, 0x76}, {0x0, 0x00, 0x77}, {0x0, 0x00, 0x78}, {0x0, 0x00, 0x79}, {0x0, 0x00, 0x7A}, {0x0, 0x00, 0x7B}, {0x0, 0x00, 0x7C}, {0x0, 0x00, 0x7D}, {0x0, 0x00, 0x7E}, {0x0, 0x00, 0x7F}, {0x0, 0x00, 0x80}, {0x0, 0x00, 0x81}, {0x0, 0x00, 0x82}, {0x0, 0x00, 0x83}, {0x0, 0x00, 0x84}, {0x0, 0x00, 0x85}, {0x0, 0x00, 0x86}, {0x0, 0x00, 0x87}, {0x0, 0x00, 0x88}, {0x0, 0x00, 0x89}, {0x0, 0x00, 0x8A}, {0x0, 0x00, 0x8B}, {0x0, 0x00, 0x8C}, {0x0, 0x00, 0x8D}, {0x0, 0x00, 0x8E}, {0x0, 0x00, 0x8F}, {0x0, 0x00, 0x90}, {0x0, 0x00, 0x91}, {0x0, 0x00, 0x92}, {0x0, 0x00, 0x93}, {0x0, 0x00, 0x94}, {0x0, 0x00, 0x95}, {0x0, 0x00, 0x96}, {0x0, 0x00, 0x97}, {0x0, 0x00, 0x98}, {0x0, 0x00, 0x99}, {0x0, 0x00, 0x9A}, {0x0, 0x00, 0x9B}, {0x0, 0x00, 0x9C}, {0x0, 0x00, 0x9D}, {0x0, 0x00, 0x9E}, {0x0, 0x00, 0x9F}, {0x0, 0x00, 0xA0}, {0x0, 0x00, 0xA1}, {0x0, 0x00, 0xA2}, {0x0, 0x00, 0xA3}, {0x0, 0x00, 0xA4}, {0x0, 0x00, 0xA5}, {0x0, 0x00, 0xA6}, {0x0, 0x00, 0xA7}, {0x0, 0x00, 0xA8}, {0x0, 0x00, 0xA9}, {0x0, 0x00, 0xAA}, {0x0, 0x00, 0xAB}, {0x0, 0x00, 0xAC}, {0x0, 0x00, 0xAD}, {0x0, 0x00, 0xAE}, {0x0, 0x00, 0xAF}, {0x0, 0x00, 0xB0}, {0x0, 0x00, 0xB1}, {0x0, 0x00, 0xB2}, {0x0, 0x00, 0xB3}, {0x0, 0x00, 0xB4}, {0x0, 0x00, 0xB5}, {0x0, 0x00, 0xB6}, {0x0, 0x00, 0xB7}, {0x0, 0x00, 0xB8}, {0x0, 0x00, 0xB9}, {0x0, 0x00, 0xBA}, {0x0, 0x00, 0xBB}, {0x0, 0x00, 0xBC}, {0x0, 0x00, 0xBD}, {0x0, 0x00, 0xBE}, {0x0, 0x00, 0xBF}, {0x0, 0x00, 0xC0}, {0x0, 0x00, 0xC1}, {0x0, 0x00, 0xC2}, {0x0, 0x00, 0xC3}, {0x0, 0x00, 0xC4}, {0x0, 0x00, 0xC5}, {0x0, 0x00, 0xC6}, {0x0, 0x00, 0xC7}, {0x0, 0x00, 0xC8}, {0x0, 0x00, 0xC9}, {0x0, 0x00, 0xCA}, {0x0, 0x00, 0xCB}, {0x0, 0x00, 0xCC}, {0x1, 0xF4, 0x00}, {0x1, 0x38, 0x01}, {0x1, 0x40, 0x02}, {0x1, 0x0A, 0x03}, {0x1, 0x40, 0x04}, {0x1, 0x40, 0x05}, {0x1, 0x40, 0x06}, {0x1, 0x67, 0x07}, {0x1, 0x31, 0x08}, {0x1, 0x00, 0x09}, {0x1, 0x00, 0x0A}, {0x1, 0x00, 0x0B}, {0x1, 0x14, 0x0C}, {0x1, 0x00, 0x0D}, {0x1, 0x00, 0x0E}, {0x1, 0x00, 0x0F}, {0x1, 0x1E, 0x10}, {0x1, 0x00, 0x11}, {0x1, 0x00, 0x12}, {0x1, 0x00, 0x13}, {0x1, 0x00, 0x14}, {0x1, 0xFF, 0x15}, {0x1, 0x01, 0x16}, {0x1, 0x32, 0x17}, {0x1, 0x23, 0x18}, {0x1, 0xCE, 0x19}, {0x1, 0x23, 0x1A}, {0x1, 0x32, 0x1B}, {0x1, 0x8D, 0x1C}, {0x1, 0xCE, 0x1D}, {0x1, 0x8D, 0x1E}, {0x1, 0x00, 0x1F}, {0x1, 0x00, 0x20}, {0x1, 0xFF, 0x3E}, {0x1, 0x02, 0x3F}, {0x1, 0x00, 0x40}, {0x1, 0x00, 0x41}, {0x1, 0x00, 0x42}, {0x1, 0x00, 0x43}, {0x1, 0x00, 0x44}, {0x1, 0x00, 0x45}, {0x1, 0x00, 0x46}, {0x1, 0x00, 0x47}, {0x1, 0x00, 0x48}, {0x1, 0x00, 0x49}, {0x1, 0x00, 0x4A}, {0x1, 0x00, 0x4B}, {0x1, 0x00, 0x4C}, {0x1, 0x00, 0x4D}, {0x1, 0x00, 0x4E}, {0x1, 0x00, 0x4F}, {0x1, 0x00, 0x50}, {0x1, 0x00, 0x51}, {0x1, 0x00, 0x52}, {0x1, 0x00, 0x53}, {0x1, 0x00, 0x54}, {0x1, 0x00, 0x55}, {0x1, 0x00, 0x56}, {0x1, 0x00, 0x57}, {0x1, 0x00, 0x58}, {0x1, 0x00, 0x59}, {0x1, 0x00, 0x5A}, {0x2, 0x03, 0x00}, {0x2, 0x00, 0x01}, {0x2, 0x00, 0x05}, {0x2, 0x00, 0x06}, {0x2, 0x00, 0x07}, {0x2, 0x00, 0x10}, {0x2, 0x00, 0x11}, /* Strange - looks like the 501 driver doesn't do anything * at insert time except read the EEPROM */ {} }; /* Data for video camera init before capture. * Capture and decoding by Colin Peart. * This is for the 3com HomeConnect Lite which is spca501a based. */ static const __u16 spca501_3com_open_data[][3] = { /* bmRequest,value,index */ {0x2, 0x0050, 0x0000}, /* C/S Enable TG soft reset, timing mode=010 */ {0x2, 0x0043, 0x0000}, /* C/S Disable TG soft reset, timing mode=010 */ {0x2, 0x0002, 0x0005}, /* C/S GPIO */ {0x2, 0x0003, 0x0005}, /* C/S GPIO */ #ifdef CCDSP_SET {0x1, 0x0020, 0x0001}, /* CCDSP Options */ {0x1, 0x0020, 0x0002}, /* CCDSP Black Level */ {0x1, 0x006e, 0x0007}, /* CCDSP Gamma options */ {0x1, 0x0090, 0x0015}, /* CCDSP Luminance Low */ {0x1, 0x00ff, 0x0016}, /* CCDSP Luminance High */ {0x1, 0x0003, 0x003F}, /* CCDSP Gamma correction toggle */ #ifdef ALTER_GAMMA {0x1, 0x0010, 0x0008}, /* CCDSP YUV A11 */ {0x1, 0x0000, 0x0009}, /* CCDSP YUV A12 */ {0x1, 0x0000, 0x000a}, /* CCDSP YUV A13 */ {0x1, 0x0000, 0x000b}, /* CCDSP YUV A21 */ {0x1, 0x0010, 0x000c}, /* CCDSP YUV A22 */ {0x1, 0x0000, 0x000d}, /* CCDSP YUV A23 */ {0x1, 0x0000, 0x000e}, /* CCDSP YUV A31 */ {0x1, 0x0000, 0x000f}, /* CCDSP YUV A32 */ {0x1, 0x0010, 0x0010}, /* CCDSP YUV A33 */ {0x1, 0x0000, 0x0011}, /* CCDSP R Offset */ {0x1, 0x0000, 0x0012}, /* CCDSP G Offset */ {0x1, 0x0001, 0x0013}, /* CCDSP B Offset */ {0x1, 0x0001, 0x0014}, /* CCDSP BG Offset */ {0x1, 0x003f, 0x00C1}, /* CCDSP Gamma Correction Enable */ #endif #endif #ifdef TG_SET {0x0, 0x00fc, 0x0000}, /* TG Shutter Speed High Bits */ {0x0, 0x0000, 0x0001}, /* TG Shutter Speed Low Bits */ {0x0, 0x00e4, 0x0004}, /* TG DCLK*2 Adjust */ {0x0, 0x0008, 0x0005}, /* TG ADCK Adjust */ {0x0, 0x0003, 0x0006}, /* TG FR Phase Adjust */ {0x0, 0x0001, 0x0007}, /* TG FCDS Phase Adjust */ {0x0, 0x0039, 0x0008}, /* TG FS Phase Adjust */ {0x0, 0x0088, 0x000a}, /* TG MH1 */ {0x0, 0x0003, 0x000f}, /* TG Pixel ID */ /* Like below, unexplained toglleing */ {0x0, 0x0080, 0x000c}, {0x0, 0x0000, 0x000d}, {0x0, 0x0080, 0x000c}, {0x0, 0x0004, 0x000d}, {0x0, 0x0000, 0x000c}, {0x0, 0x0000, 0x000d}, {0x0, 0x0040, 0x000c}, {0x0, 0x0017, 0x000d}, {0x0, 0x00c0, 0x000c}, {0x0, 0x0000, 0x000d}, {0x0, 0x0080, 0x000c}, {0x0, 0x0006, 0x000d}, {0x0, 0x0080, 0x000c}, {0x0, 0x0004, 0x000d}, {0x0, 0x0002, 0x0003}, #endif #ifdef DSPWIN_SET {0x1, 0x001c, 0x0017}, /* CCDSP W1 Start X */ {0x1, 0x00e2, 0x0019}, /* CCDSP W2 Start X */ {0x1, 0x001c, 0x001b}, /* CCDSP W3 Start X */ {0x1, 0x00e2, 0x001d}, /* CCDSP W4 Start X */ {0x1, 0x00aa, 0x001f}, /* CCDSP W5 Start X */ {0x1, 0x0070, 0x0020}, /* CCDSP W5 Start Y */ #endif {0x0, 0x0001, 0x0010}, /* TG Start Clock */ /* {0x2, 0x006a, 0x0001}, * C/S Enable ISOSYNCH Packet Engine */ {0x2, 0x0068, 0x0001}, /* C/S Disable ISOSYNCH Packet Engine */ {0x2, 0x0000, 0x0005}, {0x2, 0x0043, 0x0000}, /* C/S Set Timing Mode, Disable TG soft reset */ {0x2, 0x0043, 0x0000}, /* C/S Set Timing Mode, Disable TG soft reset */ {0x2, 0x0002, 0x0005}, /* C/S GPIO */ {0x2, 0x0003, 0x0005}, /* C/S GPIO */ {0x2, 0x006a, 0x0001}, /* C/S Enable ISOSYNCH Packet Engine */ {} }; /* * Data used to initialize a SPCA501C with HV7131B sensor. * From a capture file taken with USBSnoop v 1.5 * I have a "SPCA501C pc camera chipset" manual by sunplus, but some * of the value meanings are obscure or simply "reserved". * to do list: * 1) Understand what every value means * 2) Understand why some values seem to appear more than once * 3) Write a small comment for each line of the following arrays. */ static const __u16 spca501c_arowana_open_data[][3] = { /* bmRequest,value,index */ {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x01, 0x0006, 0x0011}, {0x01, 0x00ff, 0x0012}, {0x01, 0x0014, 0x0013}, {0x01, 0x0000, 0x0014}, {0x01, 0x0042, 0x0051}, {0x01, 0x0040, 0x0052}, {0x01, 0x0051, 0x0053}, {0x01, 0x0040, 0x0054}, {0x01, 0x0000, 0x0055}, {0x00, 0x0025, 0x0000}, {0x00, 0x0026, 0x0000}, {0x00, 0x0001, 0x0000}, {0x00, 0x0027, 0x0000}, {0x00, 0x008a, 0x0000}, {} }; static const __u16 spca501c_arowana_init_data[][3] = { /* bmRequest,value,index */ {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x01, 0x0006, 0x0011}, {0x01, 0x00ff, 0x0012}, {0x01, 0x0014, 0x0013}, {0x01, 0x0000, 0x0014}, {0x01, 0x0042, 0x0051}, {0x01, 0x0040, 0x0052}, {0x01, 0x0051, 0x0053}, {0x01, 0x0040, 0x0054}, {0x01, 0x0000, 0x0055}, {0x00, 0x0025, 0x0000}, {0x00, 0x0026, 0x0000}, {0x00, 0x0001, 0x0000}, {0x00, 0x0027, 0x0000}, {0x00, 0x008a, 0x0000}, {0x02, 0x0000, 0x0005}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0xfffd, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffea, 0x000c}, {0x01, 0xfff4, 0x000d}, {0x01, 0xfffc, 0x000e}, {0x01, 0xffe3, 0x000f}, {0x01, 0x001f, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0067, 0x0007}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x00c8, 0x0015}, {0x01, 0x0032, 0x0016}, {0x01, 0x0000, 0x0011}, {0x01, 0x0000, 0x0012}, {0x01, 0x0000, 0x0013}, {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {0x02, 0xc000, 0x0001}, {0x02, 0x0000, 0x0005}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x000f, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0xfffd, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffea, 0x000c}, {0x01, 0xfff4, 0x000d}, {0x01, 0xfffc, 0x000e}, {0x01, 0xffe3, 0x000f}, {0x01, 0x001f, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0067, 0x0007}, {0x01, 0x0042, 0x0051}, {0x01, 0x0051, 0x0053}, {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {0x02, 0xc000, 0x0001}, {0x02, 0x0000, 0x0005}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x000c, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0000, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, {0x00, 0x0000, 0x0024}, {0x00, 0x00d5, 0x0025}, {0x00, 0x0000, 0x0026}, {0x00, 0x000b, 0x0027}, {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, {0xff, 0x0000, 0x00d0}, {0xff, 0x00d8, 0x00d1}, {0xff, 0x0000, 0x00d4}, {0xff, 0x0000, 0x00d5}, {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0x00fd, 0x000a}, {0x01, 0x0038, 0x000b}, {0x01, 0x00d1, 0x000c}, {0x01, 0x00f7, 0x000d}, {0x01, 0x00ed, 0x000e}, {0x01, 0x00d8, 0x000f}, {0x01, 0x0038, 0x0010}, {0x01, 0x00ff, 0x0015}, {0x01, 0x0001, 0x0016}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, {0x01, 0x00ff, 0x003e}, {0x01, 0x0003, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0001, 0x0056}, {0x01, 0x0060, 0x0057}, {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x100a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc002, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x001e, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x0011, 0x0008}, {0x01, 0x0032, 0x0009}, {0x01, 0xfffd, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffea, 0x000c}, {0x01, 0xfff4, 0x000d}, {0x01, 0xfffc, 0x000e}, {0x01, 0xffe3, 0x000f}, {0x01, 0x001f, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0067, 0x0007}, {0x01, 0x0042, 0x0051}, {0x01, 0x0051, 0x0053}, {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {0x01, 0x0042, 0x0051}, {0x01, 0x0051, 0x0053}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x002d, 0x0000}, {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x02, 0xc000, 0x0001}, {0x02, 0x0000, 0x0005}, {} }; /* Unknown camera from Ori Usbid 0x0000:0x0000 */ /* Based on snoops from Ori Cohen */ static const __u16 spca501c_mysterious_open_data[][3] = { {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, /* DSP Registers */ {0x01, 0x0016, 0x0011}, /* RGB offset */ {0x01, 0x0000, 0x0012}, {0x01, 0x0006, 0x0013}, {0x01, 0x0078, 0x0051}, {0x01, 0x0040, 0x0052}, {0x01, 0x0046, 0x0053}, {0x01, 0x0040, 0x0054}, {0x00, 0x0025, 0x0000}, /* {0x00, 0x0000, 0x0000 }, */ /* Part 2 */ /* TG Registers */ {0x00, 0x0026, 0x0000}, {0x00, 0x0001, 0x0000}, {0x00, 0x0027, 0x0000}, {0x00, 0x008a, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x2000, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0015, 0x0001}, {0x05, 0x00ea, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0023, 0x0001}, {0x05, 0x0003, 0x0000}, {0x05, 0x0030, 0x0001}, {0x05, 0x002b, 0x0000}, {0x05, 0x0031, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0032, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0033, 0x0001}, {0x05, 0x0023, 0x0000}, {0x05, 0x0034, 0x0001}, {0x05, 0x0002, 0x0000}, {0x05, 0x0050, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0051, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0052, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0054, 0x0001}, {0x05, 0x0001, 0x0000}, {} }; /* Based on snoops from Ori Cohen */ static const __u16 spca501c_mysterious_init_data[][3] = { /* Part 3 */ /* TG registers */ /* {0x00, 0x0000, 0x0000}, */ {0x00, 0x0000, 0x0001}, {0x00, 0x0000, 0x0002}, {0x00, 0x0006, 0x0003}, {0x00, 0x0000, 0x0004}, {0x00, 0x0090, 0x0005}, {0x00, 0x0000, 0x0006}, {0x00, 0x0040, 0x0007}, {0x00, 0x00c0, 0x0008}, {0x00, 0x004a, 0x0009}, {0x00, 0x0000, 0x000a}, {0x00, 0x0000, 0x000b}, {0x00, 0x0001, 0x000c}, {0x00, 0x0001, 0x000d}, {0x00, 0x0000, 0x000e}, {0x00, 0x0002, 0x000f}, {0x00, 0x0001, 0x0010}, {0x00, 0x0000, 0x0011}, {0x00, 0x0001, 0x0012}, {0x00, 0x0002, 0x0020}, {0x00, 0x0080, 0x0021}, /* 640 */ {0x00, 0x0001, 0x0022}, {0x00, 0x00e0, 0x0023}, /* 480 */ {0x00, 0x0000, 0x0024}, /* Offset H hight */ {0x00, 0x00d3, 0x0025}, /* low */ {0x00, 0x0000, 0x0026}, /* Offset V */ {0x00, 0x000d, 0x0027}, /* low */ {0x00, 0x0000, 0x0046}, {0x00, 0x0000, 0x0047}, {0x00, 0x0000, 0x0048}, {0x00, 0x0000, 0x0049}, {0x00, 0x0008, 0x004a}, /* DSP Registers */ {0x01, 0x00a6, 0x0000}, {0x01, 0x0028, 0x0001}, {0x01, 0x0000, 0x0002}, {0x01, 0x000a, 0x0003}, /* Level Calc bit7 ->1 Auto */ {0x01, 0x0040, 0x0004}, {0x01, 0x0066, 0x0007}, {0x01, 0x000f, 0x0008}, /* A11 Color correction coeff */ {0x01, 0x002d, 0x0009}, /* A12 */ {0x01, 0x0005, 0x000a}, /* A13 */ {0x01, 0x0023, 0x000b}, /* A21 */ {0x01, 0x00e0, 0x000c}, /* A22 */ {0x01, 0x00fd, 0x000d}, /* A23 */ {0x01, 0x00f4, 0x000e}, /* A31 */ {0x01, 0x00e4, 0x000f}, /* A32 */ {0x01, 0x0028, 0x0010}, /* A33 */ {0x01, 0x00ff, 0x0015}, /* Reserved */ {0x01, 0x0001, 0x0016}, /* Reserved */ {0x01, 0x0032, 0x0017}, /* Win1 Start begin */ {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x0000, 0x001f}, {0x01, 0x0000, 0x0020}, /* Win1 Start end */ {0x01, 0x00ff, 0x003e}, /* Reserved begin */ {0x01, 0x0002, 0x003f}, {0x01, 0x0000, 0x0040}, {0x01, 0x0035, 0x0041}, {0x01, 0x0053, 0x0042}, {0x01, 0x0069, 0x0043}, {0x01, 0x007c, 0x0044}, {0x01, 0x008c, 0x0045}, {0x01, 0x009a, 0x0046}, {0x01, 0x00a8, 0x0047}, {0x01, 0x00b4, 0x0048}, {0x01, 0x00bf, 0x0049}, {0x01, 0x00ca, 0x004a}, {0x01, 0x00d4, 0x004b}, {0x01, 0x00dd, 0x004c}, {0x01, 0x00e7, 0x004d}, {0x01, 0x00ef, 0x004e}, {0x01, 0x00f8, 0x004f}, {0x01, 0x00ff, 0x0050}, {0x01, 0x0003, 0x0056}, /* Reserved end */ {0x01, 0x0060, 0x0057}, /* Edge Gain */ {0x01, 0x0040, 0x0058}, {0x01, 0x0011, 0x0059}, /* Edge Bandwidth */ {0x01, 0x0001, 0x005a}, {0x02, 0x0007, 0x0005}, {0x02, 0xa048, 0x0000}, {0x02, 0x0007, 0x0005}, {0x02, 0x0015, 0x0006}, {0x02, 0x200a, 0x0007}, {0x02, 0xa048, 0x0000}, {0x02, 0xc000, 0x0001}, {0x02, 0x000f, 0x0005}, {0x02, 0xa048, 0x0000}, {0x05, 0x0022, 0x0004}, {0x05, 0x0025, 0x0001}, {0x05, 0x0000, 0x0000}, /* Part 4 */ {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0001, 0x0001}, {0x05, 0x0000, 0x0000}, {0x05, 0x0021, 0x0001}, {0x05, 0x00d2, 0x0000}, {0x05, 0x0020, 0x0001}, {0x05, 0x0000, 0x0000}, {0x00, 0x0090, 0x0005}, {0x01, 0x00a6, 0x0000}, {0x02, 0x0000, 0x0005}, {0x05, 0x0026, 0x0001}, {0x05, 0x0001, 0x0000}, {0x05, 0x0027, 0x0001}, {0x05, 0x004e, 0x0000}, /* Part 5 */ {0x01, 0x0003, 0x003f}, {0x01, 0x0001, 0x0056}, {0x01, 0x000f, 0x0008}, {0x01, 0x002d, 0x0009}, {0x01, 0x0005, 0x000a}, {0x01, 0x0023, 0x000b}, {0x01, 0xffe0, 0x000c}, {0x01, 0xfffd, 0x000d}, {0x01, 0xfff4, 0x000e}, {0x01, 0xffe4, 0x000f}, {0x01, 0x0028, 0x0010}, {0x01, 0x00a8, 0x0001}, {0x01, 0x0066, 0x0007}, {0x01, 0x0032, 0x0017}, {0x01, 0x0023, 0x0018}, {0x01, 0x00ce, 0x0019}, {0x01, 0x0023, 0x001a}, {0x01, 0x0032, 0x001b}, {0x01, 0x008d, 0x001c}, {0x01, 0x00ce, 0x001d}, {0x01, 0x008d, 0x001e}, {0x01, 0x00c8, 0x0015}, /* c8 Poids fort Luma */ {0x01, 0x0032, 0x0016}, /* 32 */ {0x01, 0x0016, 0x0011}, /* R 00 */ {0x01, 0x0016, 0x0012}, /* G 00 */ {0x01, 0x0016, 0x0013}, /* B 00 */ {0x01, 0x000a, 0x0003}, {0x02, 0xc002, 0x0001}, {0x02, 0x0007, 0x0005}, {} }; static int reg_write(struct gspca_dev *gspca_dev, __u16 req, __u16 index, __u16 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write: 0x%02x 0x%02x 0x%02x\n", req, index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); return ret; } static int write_vector(struct gspca_dev *gspca_dev, const __u16 data[][3]) { int ret, i = 0; while (data[i][0] != 0 || data[i][1] != 0 || data[i][2] != 0) { ret = reg_write(gspca_dev, data[i][0], data[i][2], data[i][1]); if (ret < 0) { gspca_err(gspca_dev, "Reg write failed for 0x%02x,0x%02x,0x%02x\n", data[i][0], data[i][1], data[i][2]); return ret; } i++; } return 0; } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x12, val); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, 0x00, 0x00, (val >> 8) & 0xff); reg_write(gspca_dev, 0x00, 0x01, val & 0xff); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x0c, val); } static void setblue_balance(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x11, val); } static void setred_balance(struct gspca_dev *gspca_dev, s32 val) { reg_write(gspca_dev, SPCA501_REG_CCDSP, 0x13, val); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; cam = &gspca_dev->cam; cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); sd->subtype = id->driver_info; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->subtype) { case Arowana300KCMOSCamera: case SmileIntlCamera: /* Arowana 300k CMOS Camera data */ if (write_vector(gspca_dev, spca501c_arowana_init_data)) goto error; break; case MystFromOriUnknownCamera: /* Unknown Ori CMOS Camera data */ if (write_vector(gspca_dev, spca501c_mysterious_open_data)) goto error; break; default: /* generic spca501 init data */ if (write_vector(gspca_dev, spca501_init_data)) goto error; break; } gspca_dbg(gspca_dev, D_STREAM, "Initializing SPCA501 finished\n"); return 0; error: return -EINVAL; } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int mode; switch (sd->subtype) { case ThreeComHomeConnectLite: /* Special handling for 3com data */ write_vector(gspca_dev, spca501_3com_open_data); break; case Arowana300KCMOSCamera: case SmileIntlCamera: /* Arowana 300k CMOS Camera data */ write_vector(gspca_dev, spca501c_arowana_open_data); break; case MystFromOriUnknownCamera: /* Unknown CMOS Camera data */ write_vector(gspca_dev, spca501c_mysterious_init_data); break; default: /* Generic 501 open data */ write_vector(gspca_dev, spca501_open_data); } /* memorize the wanted pixel format */ mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; /* Enable ISO packet machine CTRL reg=2, * index=1 bitmask=0x2 (bit ordinal 1) */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x6, 0x94); switch (mode) { case 0: /* 640x480 */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x07, 0x004a); break; case 1: /* 320x240 */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x07, 0x104a); break; default: /* case 2: * 160x120 */ reg_write(gspca_dev, SPCA50X_REG_USB, 0x07, 0x204a); break; } reg_write(gspca_dev, SPCA501_REG_CTLRL, 0x01, 0x02); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* Disable ISO packet * machine CTRL reg=2, index=1 bitmask=0x0 (bit ordinal 1) */ reg_write(gspca_dev, SPCA501_REG_CTLRL, 0x01, 0x00); } /* called on streamoff with alt 0 and on disconnect */ static void sd_stop0(struct gspca_dev *gspca_dev) { if (!gspca_dev->present) return; reg_write(gspca_dev, SPCA501_REG_CTLRL, 0x05, 0x00); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA501_OFFSET_DATA; len -= SPCA501_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); return; case 0xff: /* drop */ /* gspca_dev->last_packet_type = DISCARD_PACKET; */ return; } data++; len--; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_BLUE_BALANCE: setblue_balance(gspca_dev, ctrl->val); break; case V4L2_CID_RED_BALANCE: setred_balance(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 64725, 1, 64725); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 63, 1, 20); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BLUE_BALANCE, 0, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_RED_BALANCE, 0, 127, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .stop0 = sd_stop0, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x040a, 0x0002), .driver_info = KodakDVC325}, {USB_DEVICE(0x0497, 0xc001), .driver_info = SmileIntlCamera}, {USB_DEVICE(0x0506, 0x00df), .driver_info = ThreeComHomeConnectLite}, {USB_DEVICE(0x0733, 0x0401), .driver_info = IntelCreateAndShare}, {USB_DEVICE(0x0733, 0x0402), .driver_info = ViewQuestM318B}, {USB_DEVICE(0x1776, 0x501c), .driver_info = Arowana300KCMOSCamera}, {USB_DEVICE(0x0000, 0x0000), .driver_info = MystFromOriUnknownCamera}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
3 4 4 3 3 3 3 2 2 2 2 2 3 1 2 2 3 3 3 3 3 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 // SPDX-License-Identifier: GPL-2.0-or-later /* * dlmfs.c * * Code which implements the kernel side of a minimal userspace * interface to our DLM. This file handles the virtual file system * used for communication with userspace. Credit should go to ramfs, * which was a template for the fs side of this module. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. */ /* Simple VFS hooks based on: */ /* * Resizable simple ram filesystem for Linux. * * Copyright (C) 2000 Linus Torvalds. * 2000 Transmeta Corp. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/poll.h> #include <linux/uaccess.h> #include "../stackglue.h" #include "userdlm.h" #define MLOG_MASK_PREFIX ML_DLMFS #include "../cluster/masklog.h" static const struct super_operations dlmfs_ops; static const struct file_operations dlmfs_file_operations; static const struct inode_operations dlmfs_dir_inode_operations; static const struct inode_operations dlmfs_root_inode_operations; static const struct inode_operations dlmfs_file_inode_operations; static struct kmem_cache *dlmfs_inode_cache; struct workqueue_struct *user_dlm_worker; /* * These are the ABI capabilities of dlmfs. * * Over time, dlmfs has added some features that were not part of the * initial ABI. Unfortunately, some of these features are not detectable * via standard usage. For example, Linux's default poll always returns * EPOLLIN, so there is no way for a caller of poll(2) to know when dlmfs * added poll support. Instead, we provide this list of new capabilities. * * Capabilities is a read-only attribute. We do it as a module parameter * so we can discover it whether dlmfs is built in, loaded, or even not * loaded. * * The ABI features are local to this machine's dlmfs mount. This is * distinct from the locking protocol, which is concerned with inter-node * interaction. * * Capabilities: * - bast : EPOLLIN against the file descriptor of a held lock * signifies a bast fired on the lock. */ #define DLMFS_CAPABILITIES "bast stackglue" static int param_set_dlmfs_capabilities(const char *val, const struct kernel_param *kp) { printk(KERN_ERR "%s: readonly parameter\n", kp->name); return -EINVAL; } static int param_get_dlmfs_capabilities(char *buffer, const struct kernel_param *kp) { return sysfs_emit(buffer, DLMFS_CAPABILITIES); } module_param_call(capabilities, param_set_dlmfs_capabilities, param_get_dlmfs_capabilities, NULL, 0444); MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); /* * decodes a set of open flags into a valid lock level and a set of flags. * returns < 0 if we have invalid flags * flags which mean something to us: * O_RDONLY -> PRMODE level * O_WRONLY -> EXMODE level * * O_NONBLOCK -> NOQUEUE */ static int dlmfs_decode_open_flags(int open_flags, int *level, int *flags) { if (open_flags & (O_WRONLY|O_RDWR)) *level = DLM_LOCK_EX; else *level = DLM_LOCK_PR; *flags = 0; if (open_flags & O_NONBLOCK) *flags |= DLM_LKF_NOQUEUE; return 0; } static int dlmfs_file_open(struct inode *inode, struct file *file) { int status, level, flags; struct dlmfs_filp_private *fp = NULL; struct dlmfs_inode_private *ip; if (S_ISDIR(inode->i_mode)) BUG(); mlog(0, "open called on inode %llu, flags 0x%x\n", inode->i_ino, file->f_flags); status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); if (status < 0) goto bail; /* We don't want to honor O_APPEND at read/write time as it * doesn't make sense for LVB writes. */ file->f_flags &= ~O_APPEND; fp = kmalloc_obj(*fp, GFP_NOFS); if (!fp) { status = -ENOMEM; goto bail; } fp->fp_lock_level = level; ip = DLMFS_I(inode); status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); if (status < 0) { /* this is a strange error to return here but I want * to be able userspace to be able to distinguish a * valid lock request from one that simply couldn't be * granted. */ if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN) status = -ETXTBSY; kfree(fp); goto bail; } file->private_data = fp; bail: return status; } static int dlmfs_file_release(struct inode *inode, struct file *file) { int level; struct dlmfs_inode_private *ip = DLMFS_I(inode); struct dlmfs_filp_private *fp = file->private_data; if (S_ISDIR(inode->i_mode)) BUG(); mlog(0, "close called on inode %llu\n", inode->i_ino); if (fp) { level = fp->fp_lock_level; if (level != DLM_LOCK_IV) user_dlm_cluster_unlock(&ip->ip_lockres, level); kfree(fp); file->private_data = NULL; } return 0; } /* * We do ->setattr() just to override size changes. Our size is the size * of the LVB and nothing else. */ static int dlmfs_file_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); attr->ia_valid &= ~ATTR_SIZE; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } static __poll_t dlmfs_file_poll(struct file *file, poll_table *wait) { __poll_t event = 0; struct inode *inode = file_inode(file); struct dlmfs_inode_private *ip = DLMFS_I(inode); poll_wait(file, &ip->ip_lockres.l_event, wait); spin_lock(&ip->ip_lockres.l_lock); if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) event = EPOLLIN | EPOLLRDNORM; spin_unlock(&ip->ip_lockres.l_lock); return event; } static ssize_t dlmfs_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char lvb[DLM_LVB_LEN]; if (!user_dlm_read_lvb(file_inode(file), lvb)) return 0; return simple_read_from_buffer(buf, count, ppos, lvb, sizeof(lvb)); } static ssize_t dlmfs_file_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { char lvb_buf[DLM_LVB_LEN]; int bytes_left; struct inode *inode = file_inode(filp); mlog(0, "inode %llu, count = %zu, *ppos = %llu\n", inode->i_ino, count, *ppos); if (*ppos >= DLM_LVB_LEN) return -ENOSPC; /* don't write past the lvb */ if (count > DLM_LVB_LEN - *ppos) count = DLM_LVB_LEN - *ppos; if (!count) return 0; bytes_left = copy_from_user(lvb_buf, buf, count); count -= bytes_left; if (count) user_dlm_write_lvb(inode, lvb_buf, count); *ppos = *ppos + count; mlog(0, "wrote %zu bytes\n", count); return count; } static void dlmfs_init_once(void *foo) { struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; ip->ip_conn = NULL; ip->ip_parent = NULL; inode_init_once(&ip->ip_vfs_inode); } static struct inode *dlmfs_alloc_inode(struct super_block *sb) { struct dlmfs_inode_private *ip; ip = alloc_inode_sb(sb, dlmfs_inode_cache, GFP_NOFS); if (!ip) return NULL; return &ip->ip_vfs_inode; } static void dlmfs_free_inode(struct inode *inode) { kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } static void dlmfs_evict_inode(struct inode *inode) { int status; struct dlmfs_inode_private *ip; struct user_lock_res *lockres; int teardown; clear_inode(inode); mlog(0, "inode %llu\n", inode->i_ino); ip = DLMFS_I(inode); lockres = &ip->ip_lockres; if (S_ISREG(inode->i_mode)) { spin_lock(&lockres->l_lock); teardown = !!(lockres->l_flags & USER_LOCK_IN_TEARDOWN); spin_unlock(&lockres->l_lock); if (!teardown) { status = user_dlm_destroy_lock(lockres); if (status < 0) mlog_errno(status); } iput(ip->ip_parent); goto clear_fields; } mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn); /* we must be a directory. If required, lets unregister the * dlm context now. */ if (ip->ip_conn) user_dlm_unregister(ip->ip_conn); clear_fields: ip->ip_parent = NULL; ip->ip_conn = NULL; } static struct inode *dlmfs_get_root_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); umode_t mode = S_IFDIR | 0755; if (inode) { inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); simple_inode_init_ts(inode); inc_nlink(inode); inode->i_fop = &simple_dir_operations; inode->i_op = &dlmfs_root_inode_operations; } return inode; } static struct inode *dlmfs_get_inode(struct inode *parent, struct dentry *dentry, umode_t mode) { struct super_block *sb = parent->i_sb; struct inode * inode = new_inode(sb); struct dlmfs_inode_private *ip; if (!inode) return NULL; inode->i_ino = get_next_ino(); inode_init_owner(&nop_mnt_idmap, inode, parent, mode); simple_inode_init_ts(inode); ip = DLMFS_I(inode); ip->ip_conn = DLMFS_I(parent)->ip_conn; switch (mode & S_IFMT) { default: /* for now we don't support anything other than * directories and regular files. */ BUG(); break; case S_IFREG: inode->i_op = &dlmfs_file_inode_operations; inode->i_fop = &dlmfs_file_operations; i_size_write(inode, DLM_LVB_LEN); user_dlm_lock_res_init(&ip->ip_lockres, dentry); /* released at clear_inode time, this insures that we * get to drop the dlm reference on each lock *before* * we call the unregister code for releasing parent * directories. */ ip->ip_parent = igrab(parent); BUG_ON(!ip->ip_parent); break; case S_IFDIR: inode->i_op = &dlmfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == * 2 (for "." entry) */ inc_nlink(inode); break; } return inode; } /* * File creation. Allocate an inode, and we're done.. */ /* SMP-safe */ static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap, struct inode * dir, struct dentry * dentry, umode_t mode) { int status; struct inode *inode = NULL; const struct qstr *domain = &dentry->d_name; struct dlmfs_inode_private *ip; struct ocfs2_cluster_connection *conn; mlog(0, "mkdir %.*s\n", domain->len, domain->name); /* verify that we have a proper domain */ if (domain->len >= GROUP_NAME_MAX) { status = -EINVAL; mlog(ML_ERROR, "invalid domain name for directory.\n"); goto bail; } inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); if (!inode) { status = -ENOMEM; mlog_errno(status); goto bail; } ip = DLMFS_I(inode); conn = user_dlm_register(domain); if (IS_ERR(conn)) { status = PTR_ERR(conn); mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", status, domain->len, domain->name); goto bail; } ip->ip_conn = conn; inc_nlink(dir); d_make_persistent(dentry, inode); status = 0; bail: if (status < 0) iput(inode); return ERR_PTR(status); } static int dlmfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { int status = 0; struct inode *inode; const struct qstr *name = &dentry->d_name; mlog(0, "create %.*s\n", name->len, name->name); /* verify name is valid and doesn't contain any dlm reserved * characters */ if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || name->name[0] == '$') { status = -EINVAL; mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, name->name); goto bail; } inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); if (!inode) { status = -ENOMEM; mlog_errno(status); goto bail; } d_make_persistent(dentry, inode); bail: return status; } static int dlmfs_unlink(struct inode *dir, struct dentry *dentry) { int status; struct inode *inode = d_inode(dentry); mlog(0, "unlink inode %llu\n", inode->i_ino); /* if there are no current holders, or none that are waiting * to acquire a lock, this basically destroys our lockres. */ status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); if (status < 0) { mlog(ML_ERROR, "unlink %pd, error %d from destroy\n", dentry, status); goto bail; } status = simple_unlink(dir, dentry); bail: return status; } static int dlmfs_fill_super(struct super_block *sb, struct fs_context *fc) { sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = DLMFS_MAGIC; sb->s_op = &dlmfs_ops; sb->s_root = d_make_root(dlmfs_get_root_inode(sb)); if (!sb->s_root) return -ENOMEM; return 0; } static const struct file_operations dlmfs_file_operations = { .open = dlmfs_file_open, .release = dlmfs_file_release, .poll = dlmfs_file_poll, .read = dlmfs_file_read, .write = dlmfs_file_write, .llseek = default_llseek, }; static const struct inode_operations dlmfs_dir_inode_operations = { .create = dlmfs_create, .lookup = simple_lookup, .unlink = dlmfs_unlink, }; /* this way we can restrict mkdir to only the toplevel of the fs. */ static const struct inode_operations dlmfs_root_inode_operations = { .lookup = simple_lookup, .mkdir = dlmfs_mkdir, .rmdir = simple_rmdir, }; static const struct super_operations dlmfs_ops = { .statfs = simple_statfs, .alloc_inode = dlmfs_alloc_inode, .free_inode = dlmfs_free_inode, .evict_inode = dlmfs_evict_inode, .drop_inode = inode_just_drop, }; static const struct inode_operations dlmfs_file_inode_operations = { .getattr = simple_getattr, .setattr = dlmfs_file_setattr, }; static int dlmfs_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, dlmfs_fill_super); } static const struct fs_context_operations dlmfs_context_ops = { .get_tree = dlmfs_get_tree, }; static int dlmfs_init_fs_context(struct fs_context *fc) { fc->ops = &dlmfs_context_ops; return 0; } static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", .kill_sb = kill_anon_super, .init_fs_context = dlmfs_init_fs_context, }; MODULE_ALIAS_FS("ocfs2_dlmfs"); static int __init init_dlmfs_fs(void) { int status; int cleanup_inode = 0, cleanup_worker = 0; dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", sizeof(struct dlmfs_inode_private), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), dlmfs_init_once); if (!dlmfs_inode_cache) { status = -ENOMEM; goto bail; } cleanup_inode = 1; user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!user_dlm_worker) { status = -ENOMEM; goto bail; } cleanup_worker = 1; user_dlm_set_locking_protocol(); status = register_filesystem(&dlmfs_fs_type); bail: if (status) { if (cleanup_inode) kmem_cache_destroy(dlmfs_inode_cache); if (cleanup_worker) destroy_workqueue(user_dlm_worker); } else printk("OCFS2 User DLM kernel interface loaded\n"); return status; } static void __exit exit_dlmfs_fs(void) { unregister_filesystem(&dlmfs_fs_type); destroy_workqueue(user_dlm_worker); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); } MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("OCFS2 DLM-Filesystem"); module_init(init_dlmfs_fs) module_exit(exit_dlmfs_fs)
12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 // SPDX-License-Identifier: GPL-2.0-or-later /* * History * 03-01-2007 Added forwarding for x.25 Andrew Hendry */ #define pr_fmt(fmt) "X25: " fmt #include <linux/if_arp.h> #include <linux/init.h> #include <linux/slab.h> #include <net/x25.h> LIST_HEAD(x25_forward_list); DEFINE_RWLOCK(x25_forward_list_lock); int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, struct sk_buff *skb, int lci) { struct x25_route *rt; struct x25_neigh *neigh_new = NULL; struct x25_forward *x25_frwd, *new_frwd; struct sk_buff *skbn; short same_lci = 0; int rc = 0; if ((rt = x25_get_route(dest_addr)) == NULL) goto out_no_route; if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) { /* This shouldn't happen, if it occurs somehow * do something sensible */ goto out_put_route; } /* Avoid a loop. This is the normal exit path for a * system with only one x.25 iface and default route */ if (rt->dev == from->dev) { goto out_put_nb; } /* Remote end sending a call request on an already * established LCI? It shouldn't happen, just in case.. */ read_lock_bh(&x25_forward_list_lock); list_for_each_entry(x25_frwd, &x25_forward_list, node) { if (x25_frwd->lci == lci) { pr_warn("call request for lci which is already registered!, transmitting but not registering new pair\n"); same_lci = 1; } } read_unlock_bh(&x25_forward_list_lock); /* Save the forwarding details for future traffic */ if (!same_lci){ if ((new_frwd = kmalloc_obj(struct x25_forward, GFP_ATOMIC)) == NULL){ rc = -ENOMEM; goto out_put_nb; } new_frwd->lci = lci; new_frwd->dev1 = rt->dev; new_frwd->dev2 = from->dev; write_lock_bh(&x25_forward_list_lock); list_add(&new_frwd->node, &x25_forward_list); write_unlock_bh(&x25_forward_list_lock); } /* Forward the call request */ if ( (skbn = skb_clone(skb, GFP_ATOMIC)) == NULL){ goto out_put_nb; } x25_transmit_link(skbn, neigh_new); rc = 1; out_put_nb: x25_neigh_put(neigh_new); out_put_route: x25_route_put(rt); out_no_route: return rc; } int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) { struct x25_forward *frwd; struct net_device *peer = NULL; struct x25_neigh *nb; struct sk_buff *skbn; int rc = 0; read_lock_bh(&x25_forward_list_lock); list_for_each_entry(frwd, &x25_forward_list, node) { if (frwd->lci == lci) { /* The call is established, either side can send */ if (from->dev == frwd->dev1) { peer = frwd->dev2; } else { peer = frwd->dev1; } break; } } read_unlock_bh(&x25_forward_list_lock); if ( (nb = x25_get_neigh(peer)) == NULL) goto out; if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){ goto output; } x25_transmit_link(skbn, nb); rc = 1; output: x25_neigh_put(nb); out: return rc; } void x25_clear_forward_by_lci(unsigned int lci) { struct x25_forward *fwd, *tmp; write_lock_bh(&x25_forward_list_lock); list_for_each_entry_safe(fwd, tmp, &x25_forward_list, node) { if (fwd->lci == lci) { list_del(&fwd->node); kfree(fwd); } } write_unlock_bh(&x25_forward_list_lock); } void x25_clear_forward_by_dev(struct net_device *dev) { struct x25_forward *fwd, *tmp; write_lock_bh(&x25_forward_list_lock); list_for_each_entry_safe(fwd, tmp, &x25_forward_list, node) { if ((fwd->dev1 == dev) || (fwd->dev2 == dev)){ list_del(&fwd->node); kfree(fwd); } } write_unlock_bh(&x25_forward_list_lock); }
10 10 5 5 10 10 10 5 8 8 8 3 8 9 4 4 2 2 4 2 9 9 9 9 9 9 5 5 1 4 9 1 1 9 9 9 9 10 10 1 9 9 5 4 1 3 1 2 1 1 1 1 1 1 1 9 11 1 10 10 1 9 9 11 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 // SPDX-License-Identifier: GPL-2.0-or-later /*************************************************************************** * * Copyright (C) 2007-2010 SMSC * *****************************************************************************/ #include <linux/module.h> #include <linux/kmod.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/bitrev.h> #include <linux/crc16.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include <linux/slab.h> #include <linux/of_net.h> #include "smsc75xx.h" #define SMSC_CHIPNAME "smsc75xx" #define HS_USB_PKT_SIZE (512) #define FS_USB_PKT_SIZE (64) #define DEFAULT_HS_BURST_CAP_SIZE (16 * 1024 + 5 * HS_USB_PKT_SIZE) #define DEFAULT_FS_BURST_CAP_SIZE (6 * 1024 + 33 * FS_USB_PKT_SIZE) #define DEFAULT_BULK_IN_DELAY (0x00002000) #define MAX_SINGLE_PACKET_SIZE (9000) #define LAN75XX_EEPROM_MAGIC (0x7500) #define EEPROM_MAC_OFFSET (0x01) #define DEFAULT_TX_CSUM_ENABLE (true) #define DEFAULT_RX_CSUM_ENABLE (true) #define SMSC75XX_INTERNAL_PHY_ID (1) #define SMSC75XX_TX_OVERHEAD (8) #define MAX_RX_FIFO_SIZE (20 * 1024) #define MAX_TX_FIFO_SIZE (12 * 1024) #define USB_VENDOR_ID_SMSC (0x0424) #define USB_PRODUCT_ID_LAN7500 (0x7500) #define USB_PRODUCT_ID_LAN7505 (0x7505) #define RXW_PADDING 2 #define SUPPORTED_WAKE (WAKE_PHY | WAKE_UCAST | WAKE_BCAST | \ WAKE_MCAST | WAKE_ARP | WAKE_MAGIC) #define SUSPEND_SUSPEND0 (0x01) #define SUSPEND_SUSPEND1 (0x02) #define SUSPEND_SUSPEND2 (0x04) #define SUSPEND_SUSPEND3 (0x08) #define SUSPEND_ALLMODES (SUSPEND_SUSPEND0 | SUSPEND_SUSPEND1 | \ SUSPEND_SUSPEND2 | SUSPEND_SUSPEND3) struct smsc75xx_priv { struct usbnet *dev; u32 rfe_ctl; u32 wolopts; u32 multicast_hash_table[DP_SEL_VHF_HASH_LEN]; struct mutex dataport_mutex; spinlock_t rfe_ctl_lock; struct work_struct set_multicast; u8 suspend_flags; }; static bool turbo_mode = true; module_param(turbo_mode, bool, 0644); MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); static int smsc75xx_link_ok_nopm(struct usbnet *dev); static int smsc75xx_phy_gig_workaround(struct usbnet *dev); static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, u32 *data, int in_pm) { u32 buf; int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); if (!in_pm) fn = usbnet_read_cmd; else fn = usbnet_read_cmd_nopm; ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); if (unlikely(ret < 4)) { ret = ret < 0 ? ret : -ENODATA; netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); return ret; } le32_to_cpus(&buf); *data = buf; return ret; } static int __must_check __smsc75xx_write_reg(struct usbnet *dev, u32 index, u32 data, int in_pm) { u32 buf; int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); if (!in_pm) fn = usbnet_write_cmd; else fn = usbnet_write_cmd_nopm; buf = data; cpu_to_le32s(&buf); ret = fn(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); if (unlikely(ret < 0)) netdev_warn(dev->net, "Failed to write reg index 0x%08x: %d\n", index, ret); return ret; } static int __must_check smsc75xx_read_reg_nopm(struct usbnet *dev, u32 index, u32 *data) { return __smsc75xx_read_reg(dev, index, data, 1); } static int __must_check smsc75xx_write_reg_nopm(struct usbnet *dev, u32 index, u32 data) { return __smsc75xx_write_reg(dev, index, data, 1); } static int __must_check smsc75xx_read_reg(struct usbnet *dev, u32 index, u32 *data) { return __smsc75xx_read_reg(dev, index, data, 0); } static int __must_check smsc75xx_write_reg(struct usbnet *dev, u32 index, u32 data) { return __smsc75xx_write_reg(dev, index, data, 0); } /* Loop until the read is completed with timeout * called with phy_mutex held */ static __must_check int __smsc75xx_phy_wait_not_busy(struct usbnet *dev, int in_pm) { unsigned long start_time = jiffies; u32 val; int ret; do { ret = __smsc75xx_read_reg(dev, MII_ACCESS, &val, in_pm); if (ret < 0) { netdev_warn(dev->net, "Error reading MII_ACCESS\n"); return ret; } if (!(val & MII_ACCESS_BUSY)) return 0; } while (!time_after(jiffies, start_time + HZ)); return -EIO; } static int __smsc75xx_mdio_read(struct net_device *netdev, int phy_id, int idx, int in_pm) { struct usbnet *dev = netdev_priv(netdev); u32 val, addr; int ret; mutex_lock(&dev->phy_mutex); /* confirm MII not busy */ ret = __smsc75xx_phy_wait_not_busy(dev, in_pm); if (ret < 0) { netdev_warn(dev->net, "MII is busy in smsc75xx_mdio_read\n"); goto done; } /* set the address, index & direction (read from PHY) */ phy_id &= dev->mii.phy_id_mask; idx &= dev->mii.reg_num_mask; addr = ((phy_id << MII_ACCESS_PHY_ADDR_SHIFT) & MII_ACCESS_PHY_ADDR) | ((idx << MII_ACCESS_REG_ADDR_SHIFT) & MII_ACCESS_REG_ADDR) | MII_ACCESS_READ | MII_ACCESS_BUSY; ret = __smsc75xx_write_reg(dev, MII_ACCESS, addr, in_pm); if (ret < 0) { netdev_warn(dev->net, "Error writing MII_ACCESS\n"); goto done; } ret = __smsc75xx_phy_wait_not_busy(dev, in_pm); if (ret < 0) { netdev_warn(dev->net, "Timed out reading MII reg %02X\n", idx); goto done; } ret = __smsc75xx_read_reg(dev, MII_DATA, &val, in_pm); if (ret < 0) { netdev_warn(dev->net, "Error reading MII_DATA\n"); goto done; } ret = (u16)(val & 0xFFFF); done: mutex_unlock(&dev->phy_mutex); return ret; } static void __smsc75xx_mdio_write(struct net_device *netdev, int phy_id, int idx, int regval, int in_pm) { struct usbnet *dev = netdev_priv(netdev); u32 val, addr; int ret; mutex_lock(&dev->phy_mutex); /* confirm MII not busy */ ret = __smsc75xx_phy_wait_not_busy(dev, in_pm); if (ret < 0) { netdev_warn(dev->net, "MII is busy in smsc75xx_mdio_write\n"); goto done; } val = regval; ret = __smsc75xx_write_reg(dev, MII_DATA, val, in_pm); if (ret < 0) { netdev_warn(dev->net, "Error writing MII_DATA\n"); goto done; } /* set the address, index & direction (write to PHY) */ phy_id &= dev->mii.phy_id_mask; idx &= dev->mii.reg_num_mask; addr = ((phy_id << MII_ACCESS_PHY_ADDR_SHIFT) & MII_ACCESS_PHY_ADDR) | ((idx << MII_ACCESS_REG_ADDR_SHIFT) & MII_ACCESS_REG_ADDR) | MII_ACCESS_WRITE | MII_ACCESS_BUSY; ret = __smsc75xx_write_reg(dev, MII_ACCESS, addr, in_pm); if (ret < 0) { netdev_warn(dev->net, "Error writing MII_ACCESS\n"); goto done; } ret = __smsc75xx_phy_wait_not_busy(dev, in_pm); if (ret < 0) { netdev_warn(dev->net, "Timed out writing MII reg %02X\n", idx); goto done; } done: mutex_unlock(&dev->phy_mutex); } static int smsc75xx_mdio_read_nopm(struct net_device *netdev, int phy_id, int idx) { return __smsc75xx_mdio_read(netdev, phy_id, idx, 1); } static void smsc75xx_mdio_write_nopm(struct net_device *netdev, int phy_id, int idx, int regval) { __smsc75xx_mdio_write(netdev, phy_id, idx, regval, 1); } static int smsc75xx_mdio_read(struct net_device *netdev, int phy_id, int idx) { return __smsc75xx_mdio_read(netdev, phy_id, idx, 0); } static void smsc75xx_mdio_write(struct net_device *netdev, int phy_id, int idx, int regval) { __smsc75xx_mdio_write(netdev, phy_id, idx, regval, 0); } static int smsc75xx_wait_eeprom(struct usbnet *dev) { unsigned long start_time = jiffies; u32 val; int ret; do { ret = smsc75xx_read_reg(dev, E2P_CMD, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading E2P_CMD\n"); return ret; } if (!(val & E2P_CMD_BUSY) || (val & E2P_CMD_TIMEOUT)) break; udelay(40); } while (!time_after(jiffies, start_time + HZ)); if (val & (E2P_CMD_TIMEOUT | E2P_CMD_BUSY)) { netdev_warn(dev->net, "EEPROM read operation timeout\n"); return -EIO; } return 0; } static int smsc75xx_eeprom_confirm_not_busy(struct usbnet *dev) { unsigned long start_time = jiffies; u32 val; int ret; do { ret = smsc75xx_read_reg(dev, E2P_CMD, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading E2P_CMD\n"); return ret; } if (!(val & E2P_CMD_BUSY)) return 0; udelay(40); } while (!time_after(jiffies, start_time + HZ)); netdev_warn(dev->net, "EEPROM is busy\n"); return -EIO; } static int smsc75xx_read_eeprom(struct usbnet *dev, u32 offset, u32 length, u8 *data) { u32 val; int i, ret; BUG_ON(!dev); BUG_ON(!data); ret = smsc75xx_eeprom_confirm_not_busy(dev); if (ret) return ret; for (i = 0; i < length; i++) { val = E2P_CMD_BUSY | E2P_CMD_READ | (offset & E2P_CMD_ADDR); ret = smsc75xx_write_reg(dev, E2P_CMD, val); if (ret < 0) { netdev_warn(dev->net, "Error writing E2P_CMD\n"); return ret; } ret = smsc75xx_wait_eeprom(dev); if (ret < 0) return ret; ret = smsc75xx_read_reg(dev, E2P_DATA, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading E2P_DATA\n"); return ret; } data[i] = val & 0xFF; offset++; } return 0; } static int smsc75xx_write_eeprom(struct usbnet *dev, u32 offset, u32 length, u8 *data) { u32 val; int i, ret; BUG_ON(!dev); BUG_ON(!data); ret = smsc75xx_eeprom_confirm_not_busy(dev); if (ret) return ret; /* Issue write/erase enable command */ val = E2P_CMD_BUSY | E2P_CMD_EWEN; ret = smsc75xx_write_reg(dev, E2P_CMD, val); if (ret < 0) { netdev_warn(dev->net, "Error writing E2P_CMD\n"); return ret; } ret = smsc75xx_wait_eeprom(dev); if (ret < 0) return ret; for (i = 0; i < length; i++) { /* Fill data register */ val = data[i]; ret = smsc75xx_write_reg(dev, E2P_DATA, val); if (ret < 0) { netdev_warn(dev->net, "Error writing E2P_DATA\n"); return ret; } /* Send "write" command */ val = E2P_CMD_BUSY | E2P_CMD_WRITE | (offset & E2P_CMD_ADDR); ret = smsc75xx_write_reg(dev, E2P_CMD, val); if (ret < 0) { netdev_warn(dev->net, "Error writing E2P_CMD\n"); return ret; } ret = smsc75xx_wait_eeprom(dev); if (ret < 0) return ret; offset++; } return 0; } static int smsc75xx_dataport_wait_not_busy(struct usbnet *dev) { int i, ret; for (i = 0; i < 100; i++) { u32 dp_sel; ret = smsc75xx_read_reg(dev, DP_SEL, &dp_sel); if (ret < 0) { netdev_warn(dev->net, "Error reading DP_SEL\n"); return ret; } if (dp_sel & DP_SEL_DPRDY) return 0; udelay(40); } netdev_warn(dev->net, "smsc75xx_dataport_wait_not_busy timed out\n"); return -EIO; } static int smsc75xx_dataport_write(struct usbnet *dev, u32 ram_select, u32 addr, u32 length, u32 *buf) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 dp_sel; int i, ret; mutex_lock(&pdata->dataport_mutex); ret = smsc75xx_dataport_wait_not_busy(dev); if (ret < 0) { netdev_warn(dev->net, "smsc75xx_dataport_write busy on entry\n"); goto done; } ret = smsc75xx_read_reg(dev, DP_SEL, &dp_sel); if (ret < 0) { netdev_warn(dev->net, "Error reading DP_SEL\n"); goto done; } dp_sel &= ~DP_SEL_RSEL; dp_sel |= ram_select; ret = smsc75xx_write_reg(dev, DP_SEL, dp_sel); if (ret < 0) { netdev_warn(dev->net, "Error writing DP_SEL\n"); goto done; } for (i = 0; i < length; i++) { ret = smsc75xx_write_reg(dev, DP_ADDR, addr + i); if (ret < 0) { netdev_warn(dev->net, "Error writing DP_ADDR\n"); goto done; } ret = smsc75xx_write_reg(dev, DP_DATA, buf[i]); if (ret < 0) { netdev_warn(dev->net, "Error writing DP_DATA\n"); goto done; } ret = smsc75xx_write_reg(dev, DP_CMD, DP_CMD_WRITE); if (ret < 0) { netdev_warn(dev->net, "Error writing DP_CMD\n"); goto done; } ret = smsc75xx_dataport_wait_not_busy(dev); if (ret < 0) { netdev_warn(dev->net, "smsc75xx_dataport_write timeout\n"); goto done; } } done: mutex_unlock(&pdata->dataport_mutex); return ret; } /* returns hash bit number for given MAC address */ static u32 smsc75xx_hash(char addr[ETH_ALEN]) { return (ether_crc(ETH_ALEN, addr) >> 23) & 0x1ff; } static void smsc75xx_deferred_multicast_write(struct work_struct *param) { struct smsc75xx_priv *pdata = container_of(param, struct smsc75xx_priv, set_multicast); struct usbnet *dev = pdata->dev; int ret; netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n", pdata->rfe_ctl); smsc75xx_dataport_write(dev, DP_SEL_VHF, DP_SEL_VHF_VLAN_LEN, DP_SEL_VHF_HASH_LEN, pdata->multicast_hash_table); ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); if (ret < 0) netdev_warn(dev->net, "Error writing RFE_CRL\n"); } static void smsc75xx_set_multicast(struct net_device *netdev) { struct usbnet *dev = netdev_priv(netdev); struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); unsigned long flags; int i; spin_lock_irqsave(&pdata->rfe_ctl_lock, flags); pdata->rfe_ctl &= ~(RFE_CTL_AU | RFE_CTL_AM | RFE_CTL_DPF | RFE_CTL_MHF); pdata->rfe_ctl |= RFE_CTL_AB; for (i = 0; i < DP_SEL_VHF_HASH_LEN; i++) pdata->multicast_hash_table[i] = 0; if (dev->net->flags & IFF_PROMISC) { netif_dbg(dev, drv, dev->net, "promiscuous mode enabled\n"); pdata->rfe_ctl |= RFE_CTL_AM | RFE_CTL_AU; } else if (dev->net->flags & IFF_ALLMULTI) { netif_dbg(dev, drv, dev->net, "receive all multicast enabled\n"); pdata->rfe_ctl |= RFE_CTL_AM | RFE_CTL_DPF; } else if (!netdev_mc_empty(dev->net)) { struct netdev_hw_addr *ha; netif_dbg(dev, drv, dev->net, "receive multicast hash filter\n"); pdata->rfe_ctl |= RFE_CTL_MHF | RFE_CTL_DPF; netdev_for_each_mc_addr(ha, netdev) { u32 bitnum = smsc75xx_hash(ha->addr); pdata->multicast_hash_table[bitnum / 32] |= (1 << (bitnum % 32)); } } else { netif_dbg(dev, drv, dev->net, "receive own packets only\n"); pdata->rfe_ctl |= RFE_CTL_DPF; } spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags); /* defer register writes to a sleepable context */ schedule_work(&pdata->set_multicast); } static int smsc75xx_update_flowcontrol(struct usbnet *dev, u8 duplex, u16 lcladv, u16 rmtadv) { u32 flow = 0, fct_flow = 0; int ret; if (duplex == DUPLEX_FULL) { u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv); if (cap & FLOW_CTRL_TX) { flow = (FLOW_TX_FCEN | 0xFFFF); /* set fct_flow thresholds to 20% and 80% */ fct_flow = (8 << 8) | 32; } if (cap & FLOW_CTRL_RX) flow |= FLOW_RX_FCEN; netif_dbg(dev, link, dev->net, "rx pause %s, tx pause %s\n", (cap & FLOW_CTRL_RX ? "enabled" : "disabled"), (cap & FLOW_CTRL_TX ? "enabled" : "disabled")); } else { netif_dbg(dev, link, dev->net, "half duplex\n"); } ret = smsc75xx_write_reg(dev, FLOW, flow); if (ret < 0) { netdev_warn(dev->net, "Error writing FLOW\n"); return ret; } ret = smsc75xx_write_reg(dev, FCT_FLOW, fct_flow); if (ret < 0) { netdev_warn(dev->net, "Error writing FCT_FLOW\n"); return ret; } return 0; } static int smsc75xx_link_reset(struct usbnet *dev) { struct mii_if_info *mii = &dev->mii; struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET }; u16 lcladv, rmtadv; int ret; /* write to clear phy interrupt status */ smsc75xx_mdio_write(dev->net, mii->phy_id, PHY_INT_SRC, PHY_INT_SRC_CLEAR_ALL); ret = smsc75xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL); if (ret < 0) { netdev_warn(dev->net, "Error writing INT_STS\n"); return ret; } mii_check_media(mii, 1, 1); mii_ethtool_gset(&dev->mii, &ecmd); lcladv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE); rmtadv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_LPA); netif_dbg(dev, link, dev->net, "speed: %u duplex: %d lcladv: %04x rmtadv: %04x\n", ethtool_cmd_speed(&ecmd), ecmd.duplex, lcladv, rmtadv); return smsc75xx_update_flowcontrol(dev, ecmd.duplex, lcladv, rmtadv); } static void smsc75xx_status(struct usbnet *dev, struct urb *urb) { u32 intdata; if (urb->actual_length != 4) { netdev_warn(dev->net, "unexpected urb length %d\n", urb->actual_length); return; } intdata = get_unaligned_le32(urb->transfer_buffer); netif_dbg(dev, link, dev->net, "intdata: 0x%08X\n", intdata); if (intdata & INT_ENP_PHY_INT) usbnet_defer_kevent(dev, EVENT_LINK_RESET); else netdev_warn(dev->net, "unexpected interrupt, intdata=0x%08X\n", intdata); } static int smsc75xx_ethtool_get_eeprom_len(struct net_device *net) { return MAX_EEPROM_SIZE; } static int smsc75xx_ethtool_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct usbnet *dev = netdev_priv(netdev); ee->magic = LAN75XX_EEPROM_MAGIC; return smsc75xx_read_eeprom(dev, ee->offset, ee->len, data); } static int smsc75xx_ethtool_set_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct usbnet *dev = netdev_priv(netdev); if (ee->magic != LAN75XX_EEPROM_MAGIC) { netdev_warn(dev->net, "EEPROM: magic value mismatch: 0x%x\n", ee->magic); return -EINVAL; } return smsc75xx_write_eeprom(dev, ee->offset, ee->len, data); } static void smsc75xx_ethtool_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); wolinfo->supported = SUPPORTED_WAKE; wolinfo->wolopts = pdata->wolopts; } static int smsc75xx_ethtool_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; if (wolinfo->wolopts & ~SUPPORTED_WAKE) return -EINVAL; pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); if (ret < 0) netdev_warn(dev->net, "device_set_wakeup_enable error %d\n", ret); return ret; } static const struct ethtool_ops smsc75xx_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_eeprom_len = smsc75xx_ethtool_get_eeprom_len, .get_eeprom = smsc75xx_ethtool_get_eeprom, .set_eeprom = smsc75xx_ethtool_set_eeprom, .get_wol = smsc75xx_ethtool_get_wol, .set_wol = smsc75xx_ethtool_set_wol, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) { if (!netif_running(netdev)) return -EINVAL; return usbnet_mii_ioctl(netdev, rq, cmd); } static void smsc75xx_init_mac_address(struct usbnet *dev) { u8 addr[ETH_ALEN]; /* maybe the boot loader passed the MAC address in devicetree */ if (!platform_get_ethdev_address(&dev->udev->dev, dev->net)) { if (is_valid_ether_addr(dev->net->dev_addr)) { /* device tree values are valid so use them */ netif_dbg(dev, ifup, dev->net, "MAC address read from the device tree\n"); return; } } /* try reading mac address from EEPROM */ if (smsc75xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, addr) == 0) { eth_hw_addr_set(dev->net, addr); if (is_valid_ether_addr(dev->net->dev_addr)) { /* eeprom values are valid so use them */ netif_dbg(dev, ifup, dev->net, "MAC address read from EEPROM\n"); return; } } /* no useful static MAC address found. generate a random one */ eth_hw_addr_random(dev->net); netif_dbg(dev, ifup, dev->net, "MAC address set to eth_random_addr\n"); } static int smsc75xx_set_mac_address(struct usbnet *dev) { u32 addr_lo = dev->net->dev_addr[0] | dev->net->dev_addr[1] << 8 | dev->net->dev_addr[2] << 16 | dev->net->dev_addr[3] << 24; u32 addr_hi = dev->net->dev_addr[4] | dev->net->dev_addr[5] << 8; int ret = smsc75xx_write_reg(dev, RX_ADDRH, addr_hi); if (ret < 0) { netdev_warn(dev->net, "Failed to write RX_ADDRH: %d\n", ret); return ret; } ret = smsc75xx_write_reg(dev, RX_ADDRL, addr_lo); if (ret < 0) { netdev_warn(dev->net, "Failed to write RX_ADDRL: %d\n", ret); return ret; } addr_hi |= ADDR_FILTX_FB_VALID; ret = smsc75xx_write_reg(dev, ADDR_FILTX, addr_hi); if (ret < 0) { netdev_warn(dev->net, "Failed to write ADDR_FILTX: %d\n", ret); return ret; } ret = smsc75xx_write_reg(dev, ADDR_FILTX + 4, addr_lo); if (ret < 0) netdev_warn(dev->net, "Failed to write ADDR_FILTX+4: %d\n", ret); return ret; } static int smsc75xx_phy_initialize(struct usbnet *dev) { int bmcr, ret, timeout = 0; /* Initialize MII structure */ dev->mii.dev = dev->net; dev->mii.mdio_read = smsc75xx_mdio_read; dev->mii.mdio_write = smsc75xx_mdio_write; dev->mii.phy_id_mask = 0x1f; dev->mii.reg_num_mask = 0x1f; dev->mii.supports_gmii = 1; dev->mii.phy_id = SMSC75XX_INTERNAL_PHY_ID; /* reset phy and wait for reset to complete */ smsc75xx_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); do { msleep(10); bmcr = smsc75xx_mdio_read(dev->net, dev->mii.phy_id, MII_BMCR); if (bmcr < 0) { netdev_warn(dev->net, "Error reading MII_BMCR\n"); return bmcr; } timeout++; } while ((bmcr & BMCR_RESET) && (timeout < 100)); if (timeout >= 100) { netdev_warn(dev->net, "timeout on PHY Reset\n"); return -EIO; } /* phy workaround for gig link */ smsc75xx_phy_gig_workaround(dev); smsc75xx_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); smsc75xx_mdio_write(dev->net, dev->mii.phy_id, MII_CTRL1000, ADVERTISE_1000FULL); /* read and write to clear phy interrupt status */ ret = smsc75xx_mdio_read(dev->net, dev->mii.phy_id, PHY_INT_SRC); if (ret < 0) { netdev_warn(dev->net, "Error reading PHY_INT_SRC\n"); return ret; } smsc75xx_mdio_write(dev->net, dev->mii.phy_id, PHY_INT_SRC, 0xffff); smsc75xx_mdio_write(dev->net, dev->mii.phy_id, PHY_INT_MASK, PHY_INT_MASK_DEFAULT); mii_nway_restart(&dev->mii); netif_dbg(dev, ifup, dev->net, "phy initialised successfully\n"); return 0; } static int smsc75xx_set_rx_max_frame_length(struct usbnet *dev, int size) { int ret = 0; u32 buf; bool rxenabled; ret = smsc75xx_read_reg(dev, MAC_RX, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read MAC_RX: %d\n", ret); return ret; } rxenabled = ((buf & MAC_RX_RXEN) != 0); if (rxenabled) { buf &= ~MAC_RX_RXEN; ret = smsc75xx_write_reg(dev, MAC_RX, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_RX: %d\n", ret); return ret; } } /* add 4 to size for FCS */ buf &= ~MAC_RX_MAX_SIZE; buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT) & MAC_RX_MAX_SIZE); ret = smsc75xx_write_reg(dev, MAC_RX, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_RX: %d\n", ret); return ret; } if (rxenabled) { buf |= MAC_RX_RXEN; ret = smsc75xx_write_reg(dev, MAC_RX, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_RX: %d\n", ret); return ret; } } return 0; } static int smsc75xx_change_mtu(struct net_device *netdev, int new_mtu) { struct usbnet *dev = netdev_priv(netdev); int ret; ret = smsc75xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN); if (ret < 0) { netdev_warn(dev->net, "Failed to set mac rx frame length\n"); return ret; } return usbnet_change_mtu(netdev, new_mtu); } /* Enable or disable Rx checksum offload engine */ static int smsc75xx_set_features(struct net_device *netdev, netdev_features_t features) { struct usbnet *dev = netdev_priv(netdev); struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); unsigned long flags; int ret; spin_lock_irqsave(&pdata->rfe_ctl_lock, flags); if (features & NETIF_F_RXCSUM) pdata->rfe_ctl |= RFE_CTL_TCPUDP_CKM | RFE_CTL_IP_CKM; else pdata->rfe_ctl &= ~(RFE_CTL_TCPUDP_CKM | RFE_CTL_IP_CKM); spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags); /* it's racing here! */ ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); if (ret < 0) { netdev_warn(dev->net, "Error writing RFE_CTL\n"); return ret; } return 0; } static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) { int timeout = 0; do { u32 buf; int ret; ret = __smsc75xx_read_reg(dev, PMT_CTL, &buf, in_pm); if (ret < 0) { netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); return ret; } if (buf & PMT_CTL_DEV_RDY) return 0; msleep(10); timeout++; } while (timeout < 100); netdev_warn(dev->net, "timeout waiting for device ready\n"); return -EIO; } static int smsc75xx_phy_gig_workaround(struct usbnet *dev) { struct mii_if_info *mii = &dev->mii; int ret = 0, timeout = 0; u32 buf, link_up = 0; /* Set the phy in Gig loopback */ smsc75xx_mdio_write(dev->net, mii->phy_id, MII_BMCR, 0x4040); /* Wait for the link up */ do { link_up = smsc75xx_link_ok_nopm(dev); usleep_range(10000, 20000); timeout++; } while ((!link_up) && (timeout < 1000)); if (timeout >= 1000) { netdev_warn(dev->net, "Timeout waiting for PHY link up\n"); return -EIO; } /* phy reset */ ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); return ret; } buf |= PMT_CTL_PHY_RST; ret = smsc75xx_write_reg(dev, PMT_CTL, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write PMT_CTL: %d\n", ret); return ret; } timeout = 0; do { usleep_range(10000, 20000); ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); return ret; } timeout++; } while ((buf & PMT_CTL_PHY_RST) && (timeout < 100)); if (timeout >= 100) { netdev_warn(dev->net, "timeout waiting for PHY Reset\n"); return -EIO; } return 0; } static int smsc75xx_reset(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 buf; int ret = 0, timeout; netif_dbg(dev, ifup, dev->net, "entering smsc75xx_reset\n"); ret = smsc75xx_wait_ready(dev, 0); if (ret < 0) { netdev_warn(dev->net, "device not ready in smsc75xx_reset\n"); return ret; } ret = smsc75xx_read_reg(dev, HW_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret); return ret; } buf |= HW_CFG_LRST; ret = smsc75xx_write_reg(dev, HW_CFG, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write HW_CFG: %d\n", ret); return ret; } timeout = 0; do { msleep(10); ret = smsc75xx_read_reg(dev, HW_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret); return ret; } timeout++; } while ((buf & HW_CFG_LRST) && (timeout < 100)); if (timeout >= 100) { netdev_warn(dev->net, "timeout on completion of Lite Reset\n"); return -EIO; } netif_dbg(dev, ifup, dev->net, "Lite reset complete, resetting PHY\n"); ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); return ret; } buf |= PMT_CTL_PHY_RST; ret = smsc75xx_write_reg(dev, PMT_CTL, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write PMT_CTL: %d\n", ret); return ret; } timeout = 0; do { msleep(10); ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read PMT_CTL: %d\n", ret); return ret; } timeout++; } while ((buf & PMT_CTL_PHY_RST) && (timeout < 100)); if (timeout >= 100) { netdev_warn(dev->net, "timeout waiting for PHY Reset\n"); return -EIO; } netif_dbg(dev, ifup, dev->net, "PHY reset complete\n"); ret = smsc75xx_set_mac_address(dev); if (ret < 0) { netdev_warn(dev->net, "Failed to set mac address\n"); return ret; } netif_dbg(dev, ifup, dev->net, "MAC Address: %pM\n", dev->net->dev_addr); ret = smsc75xx_read_reg(dev, HW_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "Read Value from HW_CFG : 0x%08x\n", buf); buf |= HW_CFG_BIR; ret = smsc75xx_write_reg(dev, HW_CFG, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write HW_CFG: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, HW_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "Read Value from HW_CFG after writing HW_CFG_BIR: 0x%08x\n", buf); if (!turbo_mode) { buf = 0; dev->rx_urb_size = MAX_SINGLE_PACKET_SIZE; } else if (dev->udev->speed == USB_SPEED_HIGH) { buf = DEFAULT_HS_BURST_CAP_SIZE / HS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_HS_BURST_CAP_SIZE; } else { buf = DEFAULT_FS_BURST_CAP_SIZE / FS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_FS_BURST_CAP_SIZE; } netif_dbg(dev, ifup, dev->net, "rx_urb_size=%ld\n", (ulong)dev->rx_urb_size); ret = smsc75xx_write_reg(dev, BURST_CAP, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write BURST_CAP: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, BURST_CAP, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read BURST_CAP: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "Read Value from BURST_CAP after writing: 0x%08x\n", buf); ret = smsc75xx_write_reg(dev, BULK_IN_DLY, DEFAULT_BULK_IN_DELAY); if (ret < 0) { netdev_warn(dev->net, "Failed to write BULK_IN_DLY: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, BULK_IN_DLY, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read BULK_IN_DLY: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "Read Value from BULK_IN_DLY after writing: 0x%08x\n", buf); if (turbo_mode) { ret = smsc75xx_read_reg(dev, HW_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "HW_CFG: 0x%08x\n", buf); buf |= (HW_CFG_MEF | HW_CFG_BCE); ret = smsc75xx_write_reg(dev, HW_CFG, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write HW_CFG: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, HW_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "HW_CFG: 0x%08x\n", buf); } /* set FIFO sizes */ buf = (MAX_RX_FIFO_SIZE - 512) / 512; ret = smsc75xx_write_reg(dev, FCT_RX_FIFO_END, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write FCT_RX_FIFO_END: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "FCT_RX_FIFO_END set to 0x%08x\n", buf); buf = (MAX_TX_FIFO_SIZE - 512) / 512; ret = smsc75xx_write_reg(dev, FCT_TX_FIFO_END, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write FCT_TX_FIFO_END: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "FCT_TX_FIFO_END set to 0x%08x\n", buf); ret = smsc75xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL); if (ret < 0) { netdev_warn(dev->net, "Failed to write INT_STS: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, ID_REV, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read ID_REV: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "ID_REV = 0x%08x\n", buf); ret = smsc75xx_read_reg(dev, E2P_CMD, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read E2P_CMD: %d\n", ret); return ret; } /* only set default GPIO/LED settings if no EEPROM is detected */ if (!(buf & E2P_CMD_LOADED)) { ret = smsc75xx_read_reg(dev, LED_GPIO_CFG, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read LED_GPIO_CFG: %d\n", ret); return ret; } buf &= ~(LED_GPIO_CFG_LED2_FUN_SEL | LED_GPIO_CFG_LED10_FUN_SEL); buf |= LED_GPIO_CFG_LEDGPIO_EN | LED_GPIO_CFG_LED2_FUN_SEL; ret = smsc75xx_write_reg(dev, LED_GPIO_CFG, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write LED_GPIO_CFG: %d\n", ret); return ret; } } ret = smsc75xx_write_reg(dev, FLOW, 0); if (ret < 0) { netdev_warn(dev->net, "Failed to write FLOW: %d\n", ret); return ret; } ret = smsc75xx_write_reg(dev, FCT_FLOW, 0); if (ret < 0) { netdev_warn(dev->net, "Failed to write FCT_FLOW: %d\n", ret); return ret; } /* Don't need rfe_ctl_lock during initialisation */ ret = smsc75xx_read_reg(dev, RFE_CTL, &pdata->rfe_ctl); if (ret < 0) { netdev_warn(dev->net, "Failed to read RFE_CTL: %d\n", ret); return ret; } pdata->rfe_ctl |= RFE_CTL_AB | RFE_CTL_DPF; ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); if (ret < 0) { netdev_warn(dev->net, "Failed to write RFE_CTL: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, RFE_CTL, &pdata->rfe_ctl); if (ret < 0) { netdev_warn(dev->net, "Failed to read RFE_CTL: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "RFE_CTL set to 0x%08x\n", pdata->rfe_ctl); /* Enable or disable checksum offload engines */ smsc75xx_set_features(dev->net, dev->net->features); smsc75xx_set_multicast(dev->net); ret = smsc75xx_phy_initialize(dev); if (ret < 0) { netdev_warn(dev->net, "Failed to initialize PHY: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, INT_EP_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read INT_EP_CTL: %d\n", ret); return ret; } /* enable PHY interrupts */ buf |= INT_ENP_PHY_INT; ret = smsc75xx_write_reg(dev, INT_EP_CTL, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write INT_EP_CTL: %d\n", ret); return ret; } /* allow mac to detect speed and duplex from phy */ ret = smsc75xx_read_reg(dev, MAC_CR, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read MAC_CR: %d\n", ret); return ret; } buf |= (MAC_CR_ADD | MAC_CR_ASD); ret = smsc75xx_write_reg(dev, MAC_CR, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_CR: %d\n", ret); return ret; } ret = smsc75xx_read_reg(dev, MAC_TX, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read MAC_TX: %d\n", ret); return ret; } buf |= MAC_TX_TXEN; ret = smsc75xx_write_reg(dev, MAC_TX, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_TX: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "MAC_TX set to 0x%08x\n", buf); ret = smsc75xx_read_reg(dev, FCT_TX_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read FCT_TX_CTL: %d\n", ret); return ret; } buf |= FCT_TX_CTL_EN; ret = smsc75xx_write_reg(dev, FCT_TX_CTL, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write FCT_TX_CTL: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "FCT_TX_CTL set to 0x%08x\n", buf); ret = smsc75xx_set_rx_max_frame_length(dev, dev->net->mtu + ETH_HLEN); if (ret < 0) { netdev_warn(dev->net, "Failed to set max rx frame length\n"); return ret; } ret = smsc75xx_read_reg(dev, MAC_RX, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read MAC_RX: %d\n", ret); return ret; } buf |= MAC_RX_RXEN; ret = smsc75xx_write_reg(dev, MAC_RX, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_RX: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "MAC_RX set to 0x%08x\n", buf); ret = smsc75xx_read_reg(dev, FCT_RX_CTL, &buf); if (ret < 0) { netdev_warn(dev->net, "Failed to read FCT_RX_CTL: %d\n", ret); return ret; } buf |= FCT_RX_CTL_EN; ret = smsc75xx_write_reg(dev, FCT_RX_CTL, buf); if (ret < 0) { netdev_warn(dev->net, "Failed to write FCT_RX_CTL: %d\n", ret); return ret; } netif_dbg(dev, ifup, dev->net, "FCT_RX_CTL set to 0x%08x\n", buf); netif_dbg(dev, ifup, dev->net, "smsc75xx_reset, return 0\n"); return 0; } static const struct net_device_ops smsc75xx_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = smsc75xx_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_eth_ioctl = smsc75xx_ioctl, .ndo_set_rx_mode = smsc75xx_set_multicast, .ndo_set_features = smsc75xx_set_features, }; static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) { struct smsc75xx_priv *pdata = NULL; int ret; ret = usbnet_get_endpoints(dev, intf); if (ret < 0) { netdev_warn(dev->net, "usbnet_get_endpoints failed: %d\n", ret); return ret; } dev->data[0] = (unsigned long) kzalloc_obj(struct smsc75xx_priv); pdata = (struct smsc75xx_priv *)(dev->data[0]); if (!pdata) return -ENOMEM; pdata->dev = dev; spin_lock_init(&pdata->rfe_ctl_lock); mutex_init(&pdata->dataport_mutex); INIT_WORK(&pdata->set_multicast, smsc75xx_deferred_multicast_write); if (DEFAULT_TX_CSUM_ENABLE) dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; if (DEFAULT_RX_CSUM_ENABLE) dev->net->features |= NETIF_F_RXCSUM; dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; ret = smsc75xx_wait_ready(dev, 0); if (ret < 0) { netdev_warn(dev->net, "device not ready in smsc75xx_bind\n"); goto free_pdata; } smsc75xx_init_mac_address(dev); /* Init all registers */ ret = smsc75xx_reset(dev); if (ret < 0) { netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret); goto cancel_work; } dev->net->netdev_ops = &smsc75xx_netdev_ops; dev->net->ethtool_ops = &smsc75xx_ethtool_ops; dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC75XX_TX_OVERHEAD; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE; return 0; cancel_work: cancel_work_sync(&pdata->set_multicast); free_pdata: kfree(pdata); dev->data[0] = 0; return ret; } static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); if (pdata) { cancel_work_sync(&pdata->set_multicast); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); dev->data[0] = 0; } } static u16 smsc_crc(const u8 *buffer, size_t len) { return bitrev16(crc16(0xFFFF, buffer, len)); } static int smsc75xx_write_wuff(struct usbnet *dev, int filter, u32 wuf_cfg, u32 wuf_mask1) { int cfg_base = WUF_CFGX + filter * 4; int mask_base = WUF_MASKX + filter * 16; int ret; ret = smsc75xx_write_reg(dev, cfg_base, wuf_cfg); if (ret < 0) { netdev_warn(dev->net, "Error writing WUF_CFGX\n"); return ret; } ret = smsc75xx_write_reg(dev, mask_base, wuf_mask1); if (ret < 0) { netdev_warn(dev->net, "Error writing WUF_MASKX\n"); return ret; } ret = smsc75xx_write_reg(dev, mask_base + 4, 0); if (ret < 0) { netdev_warn(dev->net, "Error writing WUF_MASKX\n"); return ret; } ret = smsc75xx_write_reg(dev, mask_base + 8, 0); if (ret < 0) { netdev_warn(dev->net, "Error writing WUF_MASKX\n"); return ret; } ret = smsc75xx_write_reg(dev, mask_base + 12, 0); if (ret < 0) { netdev_warn(dev->net, "Error writing WUF_MASKX\n"); return ret; } return 0; } static int smsc75xx_enter_suspend0(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 val; int ret; ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); return ret; } val &= (~(PMT_CTL_SUS_MODE | PMT_CTL_PHY_RST)); val |= PMT_CTL_SUS_MODE_0 | PMT_CTL_WOL_EN | PMT_CTL_WUPS; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } pdata->suspend_flags |= SUSPEND_SUSPEND0; return 0; } static int smsc75xx_enter_suspend1(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 val; int ret; ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); return ret; } val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST); val |= PMT_CTL_SUS_MODE_1; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } /* clear wol status, enable energy detection */ val &= ~PMT_CTL_WUPS; val |= (PMT_CTL_WUPS_ED | PMT_CTL_ED_EN); ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } pdata->suspend_flags |= SUSPEND_SUSPEND1; return 0; } static int smsc75xx_enter_suspend2(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 val; int ret; ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); return ret; } val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST); val |= PMT_CTL_SUS_MODE_2; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } pdata->suspend_flags |= SUSPEND_SUSPEND2; return 0; } static int smsc75xx_enter_suspend3(struct usbnet *dev) { struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 val; int ret; ret = smsc75xx_read_reg_nopm(dev, FCT_RX_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading FCT_RX_CTL\n"); return ret; } if (val & FCT_RX_CTL_RXUSED) { netdev_dbg(dev->net, "rx fifo not empty in autosuspend\n"); return -EBUSY; } ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); return ret; } val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST); val |= PMT_CTL_SUS_MODE_3 | PMT_CTL_RES_CLR_WKP_EN; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } /* clear wol status */ val &= ~PMT_CTL_WUPS; val |= PMT_CTL_WUPS_WOL; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } pdata->suspend_flags |= SUSPEND_SUSPEND3; return 0; } static int smsc75xx_enable_phy_wakeup_interrupts(struct usbnet *dev, u16 mask) { struct mii_if_info *mii = &dev->mii; int ret; netdev_dbg(dev->net, "enabling PHY wakeup interrupts\n"); /* read to clear */ ret = smsc75xx_mdio_read_nopm(dev->net, mii->phy_id, PHY_INT_SRC); if (ret < 0) { netdev_warn(dev->net, "Error reading PHY_INT_SRC\n"); return ret; } /* enable interrupt source */ ret = smsc75xx_mdio_read_nopm(dev->net, mii->phy_id, PHY_INT_MASK); if (ret < 0) { netdev_warn(dev->net, "Error reading PHY_INT_MASK\n"); return ret; } ret |= mask; smsc75xx_mdio_write_nopm(dev->net, mii->phy_id, PHY_INT_MASK, ret); return 0; } static int smsc75xx_link_ok_nopm(struct usbnet *dev) { struct mii_if_info *mii = &dev->mii; int ret; /* first, a dummy read, needed to latch some MII phys */ ret = smsc75xx_mdio_read_nopm(dev->net, mii->phy_id, MII_BMSR); if (ret < 0) { netdev_warn(dev->net, "Error reading MII_BMSR\n"); return ret; } ret = smsc75xx_mdio_read_nopm(dev->net, mii->phy_id, MII_BMSR); if (ret < 0) { netdev_warn(dev->net, "Error reading MII_BMSR\n"); return ret; } return !!(ret & BMSR_LSTATUS); } static int smsc75xx_autosuspend(struct usbnet *dev, u32 link_up) { int ret; if (!netif_running(dev->net)) { /* interface is ifconfig down so fully power down hw */ netdev_dbg(dev->net, "autosuspend entering SUSPEND2\n"); return smsc75xx_enter_suspend2(dev); } if (!link_up) { /* link is down so enter EDPD mode */ netdev_dbg(dev->net, "autosuspend entering SUSPEND1\n"); /* enable PHY wakeup events for if cable is attached */ ret = smsc75xx_enable_phy_wakeup_interrupts(dev, PHY_INT_MASK_ANEG_COMP); if (ret < 0) { netdev_warn(dev->net, "error enabling PHY wakeup ints\n"); return ret; } netdev_info(dev->net, "entering SUSPEND1 mode\n"); return smsc75xx_enter_suspend1(dev); } /* enable PHY wakeup events so we remote wakeup if cable is pulled */ ret = smsc75xx_enable_phy_wakeup_interrupts(dev, PHY_INT_MASK_LINK_DOWN); if (ret < 0) { netdev_warn(dev->net, "error enabling PHY wakeup ints\n"); return ret; } netdev_dbg(dev->net, "autosuspend entering SUSPEND3\n"); return smsc75xx_enter_suspend3(dev); } static int smsc75xx_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u32 val, link_up; int ret; ret = usbnet_suspend(intf, message); if (ret < 0) { netdev_warn(dev->net, "usbnet_suspend error\n"); return ret; } if (pdata->suspend_flags) { netdev_warn(dev->net, "error during last resume\n"); pdata->suspend_flags = 0; } /* determine if link is up using only _nopm functions */ link_up = smsc75xx_link_ok_nopm(dev); if (message.event == PM_EVENT_AUTO_SUSPEND) { ret = smsc75xx_autosuspend(dev, link_up); goto done; } /* if we get this far we're not autosuspending */ /* if no wol options set, or if link is down and we're not waking on * PHY activity, enter lowest power SUSPEND2 mode */ if (!(pdata->wolopts & SUPPORTED_WAKE) || !(link_up || (pdata->wolopts & WAKE_PHY))) { netdev_info(dev->net, "entering SUSPEND2 mode\n"); /* disable energy detect (link up) & wake up events */ ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val &= ~(WUCSR_MPEN | WUCSR_WUEN); ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); goto done; } val &= ~(PMT_CTL_ED_EN | PMT_CTL_WOL_EN); ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); goto done; } ret = smsc75xx_enter_suspend2(dev); goto done; } if (pdata->wolopts & WAKE_PHY) { ret = smsc75xx_enable_phy_wakeup_interrupts(dev, (PHY_INT_MASK_ANEG_COMP | PHY_INT_MASK_LINK_DOWN)); if (ret < 0) { netdev_warn(dev->net, "error enabling PHY wakeup ints\n"); goto done; } /* if link is down then configure EDPD and enter SUSPEND1, * otherwise enter SUSPEND0 below */ if (!link_up) { struct mii_if_info *mii = &dev->mii; netdev_info(dev->net, "entering SUSPEND1 mode\n"); /* enable energy detect power-down mode */ ret = smsc75xx_mdio_read_nopm(dev->net, mii->phy_id, PHY_MODE_CTRL_STS); if (ret < 0) { netdev_warn(dev->net, "Error reading PHY_MODE_CTRL_STS\n"); goto done; } ret |= MODE_CTRL_STS_EDPWRDOWN; smsc75xx_mdio_write_nopm(dev->net, mii->phy_id, PHY_MODE_CTRL_STS, ret); /* enter SUSPEND1 mode */ ret = smsc75xx_enter_suspend1(dev); goto done; } } if (pdata->wolopts & (WAKE_MCAST | WAKE_ARP)) { int i, filter = 0; /* disable all filters */ for (i = 0; i < WUF_NUM; i++) { ret = smsc75xx_write_reg_nopm(dev, WUF_CFGX + i * 4, 0); if (ret < 0) { netdev_warn(dev->net, "Error writing WUF_CFGX\n"); goto done; } } if (pdata->wolopts & WAKE_MCAST) { const u8 mcast[] = {0x01, 0x00, 0x5E}; netdev_info(dev->net, "enabling multicast detection\n"); val = WUF_CFGX_EN | WUF_CFGX_ATYPE_MULTICAST | smsc_crc(mcast, 3); ret = smsc75xx_write_wuff(dev, filter++, val, 0x0007); if (ret < 0) { netdev_warn(dev->net, "Error writing wakeup filter\n"); goto done; } } if (pdata->wolopts & WAKE_ARP) { const u8 arp[] = {0x08, 0x06}; netdev_info(dev->net, "enabling ARP detection\n"); val = WUF_CFGX_EN | WUF_CFGX_ATYPE_ALL | (0x0C << 16) | smsc_crc(arp, 2); ret = smsc75xx_write_wuff(dev, filter++, val, 0x0003); if (ret < 0) { netdev_warn(dev->net, "Error writing wakeup filter\n"); goto done; } } /* clear any pending pattern match packet status */ ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val |= WUCSR_WUFR; ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } netdev_info(dev->net, "enabling packet match detection\n"); ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val |= WUCSR_WUEN; ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } } else { netdev_info(dev->net, "disabling packet match detection\n"); ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val &= ~WUCSR_WUEN; ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } } /* disable magic, bcast & unicast wakeup sources */ ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val &= ~(WUCSR_MPEN | WUCSR_BCST_EN | WUCSR_PFDA_EN); ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } if (pdata->wolopts & WAKE_PHY) { netdev_info(dev->net, "enabling PHY wakeup\n"); ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); goto done; } /* clear wol status, enable energy detection */ val &= ~PMT_CTL_WUPS; val |= (PMT_CTL_WUPS_ED | PMT_CTL_ED_EN); ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); goto done; } } if (pdata->wolopts & WAKE_MAGIC) { netdev_info(dev->net, "enabling magic packet wakeup\n"); ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } /* clear any pending magic packet status */ val |= WUCSR_MPR | WUCSR_MPEN; ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } } if (pdata->wolopts & WAKE_BCAST) { netdev_info(dev->net, "enabling broadcast detection\n"); ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val |= WUCSR_BCAST_FR | WUCSR_BCST_EN; ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } } if (pdata->wolopts & WAKE_UCAST) { netdev_info(dev->net, "enabling unicast detection\n"); ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); goto done; } val |= WUCSR_WUFR | WUCSR_PFDA_EN; ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); goto done; } } /* enable receiver to enable frame reception */ ret = smsc75xx_read_reg_nopm(dev, MAC_RX, &val); if (ret < 0) { netdev_warn(dev->net, "Failed to read MAC_RX: %d\n", ret); goto done; } val |= MAC_RX_RXEN; ret = smsc75xx_write_reg_nopm(dev, MAC_RX, val); if (ret < 0) { netdev_warn(dev->net, "Failed to write MAC_RX: %d\n", ret); goto done; } /* some wol options are enabled, so enter SUSPEND0 */ netdev_info(dev->net, "entering SUSPEND0 mode\n"); ret = smsc75xx_enter_suspend0(dev); done: /* * TODO: resume() might need to handle the suspend failure * in system sleep */ if (ret && PMSG_IS_AUTO(message)) usbnet_resume(intf); return ret; } static int smsc75xx_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); u8 suspend_flags = pdata->suspend_flags; int ret; u32 val; netdev_dbg(dev->net, "resume suspend_flags=0x%02x\n", suspend_flags); /* do this first to ensure it's cleared even in error case */ pdata->suspend_flags = 0; if (suspend_flags & SUSPEND_ALLMODES) { /* Disable wakeup sources */ ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading WUCSR\n"); return ret; } val &= ~(WUCSR_WUEN | WUCSR_MPEN | WUCSR_PFDA_EN | WUCSR_BCST_EN); ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) { netdev_warn(dev->net, "Error writing WUCSR\n"); return ret; } /* clear wake-up status */ ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); return ret; } val &= ~PMT_CTL_WOL_EN; val |= PMT_CTL_WUPS; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } } if (suspend_flags & SUSPEND_SUSPEND2) { netdev_info(dev->net, "resuming from SUSPEND2\n"); ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); if (ret < 0) { netdev_warn(dev->net, "Error reading PMT_CTL\n"); return ret; } val |= PMT_CTL_PHY_PWRUP; ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); if (ret < 0) { netdev_warn(dev->net, "Error writing PMT_CTL\n"); return ret; } } ret = smsc75xx_wait_ready(dev, 1); if (ret < 0) { netdev_warn(dev->net, "device not ready in smsc75xx_resume\n"); return ret; } return usbnet_resume(intf); } static void smsc75xx_rx_csum_offload(struct usbnet *dev, struct sk_buff *skb, u32 rx_cmd_a, u32 rx_cmd_b) { if (!(dev->net->features & NETIF_F_RXCSUM) || unlikely(rx_cmd_a & RX_CMD_A_LCSM)) { skb->ip_summed = CHECKSUM_NONE; } else { skb->csum = ntohs((u16)(rx_cmd_b >> RX_CMD_B_CSUM_SHIFT)); skb->ip_summed = CHECKSUM_COMPLETE; } } static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; while (skb->len > 0) { u32 rx_cmd_a, rx_cmd_b, align_count, size; struct sk_buff *ax_skb; unsigned char *packet; rx_cmd_a = get_unaligned_le32(skb->data); skb_pull(skb, 4); rx_cmd_b = get_unaligned_le32(skb->data); skb_pull(skb, 4 + RXW_PADDING); packet = skb->data; /* get the packet length */ size = (rx_cmd_a & RX_CMD_A_LEN) - RXW_PADDING; align_count = (4 - ((size + RXW_PADDING) % 4)) % 4; if (unlikely(size > skb->len)) { netif_dbg(dev, rx_err, dev->net, "size err rx_cmd_a=0x%08x\n", rx_cmd_a); return 0; } if (unlikely(rx_cmd_a & RX_CMD_A_RED)) { netif_dbg(dev, rx_err, dev->net, "Error rx_cmd_a=0x%08x\n", rx_cmd_a); dev->net->stats.rx_errors++; dev->net->stats.rx_dropped++; if (rx_cmd_a & RX_CMD_A_FCS) dev->net->stats.rx_crc_errors++; else if (rx_cmd_a & (RX_CMD_A_LONG | RX_CMD_A_RUNT)) dev->net->stats.rx_frame_errors++; } else { /* MAX_SINGLE_PACKET_SIZE + 4(CRC) + 2(COE) + 4(Vlan) */ if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12))) { netif_dbg(dev, rx_err, dev->net, "size err rx_cmd_a=0x%08x\n", rx_cmd_a); return 0; } /* last frame in this batch */ if (skb->len == size) { smsc75xx_rx_csum_offload(dev, skb, rx_cmd_a, rx_cmd_b); skb_trim(skb, skb->len - 4); /* remove fcs */ return 1; } /* Use "size - 4" to remove fcs */ ax_skb = netdev_alloc_skb_ip_align(dev->net, size - 4); if (unlikely(!ax_skb)) { netdev_warn(dev->net, "Error allocating skb\n"); return 0; } skb_put(ax_skb, size - 4); memcpy(ax_skb->data, packet, size - 4); smsc75xx_rx_csum_offload(dev, ax_skb, rx_cmd_a, rx_cmd_b); usbnet_skb_return(dev, ax_skb); } skb_pull(skb, size); /* padding bytes before the next frame starts */ if (skb->len) skb_pull(skb, align_count); } return 1; } static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { u32 tx_cmd_a, tx_cmd_b; void *ptr; if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) { dev_kfree_skb_any(skb); return NULL; } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS; if (skb->ip_summed == CHECKSUM_PARTIAL) tx_cmd_a |= TX_CMD_A_IPE | TX_CMD_A_TPE; if (skb_is_gso(skb)) { u16 mss = max(skb_shinfo(skb)->gso_size, TX_MSS_MIN); tx_cmd_b = (mss << TX_CMD_B_MSS_SHIFT) & TX_CMD_B_MSS; tx_cmd_a |= TX_CMD_A_LSO; } else { tx_cmd_b = 0; } ptr = skb_push(skb, 8); put_unaligned_le32(tx_cmd_a, ptr); put_unaligned_le32(tx_cmd_b, ptr + 4); return skb; } static int smsc75xx_manage_power(struct usbnet *dev, int on) { dev->intf->needs_remote_wakeup = on; return 0; } static const struct driver_info smsc75xx_info = { .description = "smsc75xx USB 2.0 Gigabit Ethernet", .bind = smsc75xx_bind, .unbind = smsc75xx_unbind, .link_reset = smsc75xx_link_reset, .reset = smsc75xx_reset, .rx_fixup = smsc75xx_rx_fixup, .tx_fixup = smsc75xx_tx_fixup, .status = smsc75xx_status, .manage_power = smsc75xx_manage_power, .flags = FLAG_ETHER | FLAG_SEND_ZLP | FLAG_LINK_INTR, }; static const struct usb_device_id products[] = { { /* SMSC7500 USB Gigabit Ethernet Device */ USB_DEVICE(USB_VENDOR_ID_SMSC, USB_PRODUCT_ID_LAN7500), .driver_info = (unsigned long) &smsc75xx_info, }, { /* SMSC7500 USB Gigabit Ethernet Device */ USB_DEVICE(USB_VENDOR_ID_SMSC, USB_PRODUCT_ID_LAN7505), .driver_info = (unsigned long) &smsc75xx_info, }, { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver smsc75xx_driver = { .name = SMSC_CHIPNAME, .id_table = products, .probe = usbnet_probe, .suspend = smsc75xx_suspend, .resume = smsc75xx_resume, .reset_resume = smsc75xx_resume, .disconnect = usbnet_disconnect, .disable_hub_initiated_lpm = 1, .supports_autosuspend = 1, }; module_usb_driver(smsc75xx_driver); MODULE_AUTHOR("Nancy Lin"); MODULE_AUTHOR("Steve Glendinning <steve.glendinning@shawell.net>"); MODULE_DESCRIPTION("SMSC75XX USB 2.0 Gigabit Ethernet Devices"); MODULE_LICENSE("GPL");
17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs inode ops for the 9P2000.L protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" #include "cache.h" #include "xattr.h" #include "acl.h" static int v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev); /** * v9fs_get_fsgid_for_create - Helper function to get the gid for a new object * @dir_inode: The directory inode * * Helper function to get the gid for creating a * new file system object. This checks the S_ISGID to determine the owning * group of the new file system object. */ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) { BUG_ON(dir_inode == NULL); if (dir_inode->i_mode & S_ISGID) { /* set_gid bit is set.*/ return dir_inode->i_gid; } return current_fsgid(); } static int v9fs_test_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; /* don't match inode of different type */ if (inode_wrong_type(inode, st->st_mode)) return 0; if (inode->i_generation != st->st_gen) return 0; /* compare qid details */ if (memcmp(&v9inode->qid.version, &st->qid.version, sizeof(v9inode->qid.version))) return 0; if (v9inode->qid.type != st->qid.type) return 0; if (v9inode->qid.path != st->qid.path) return 0; return 1; } /* Always get a new inode */ static int v9fs_test_new_inode_dotl(struct inode *inode, void *data) { return 0; } static int v9fs_set_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); inode->i_generation = st->st_gen; return 0; } static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, struct p9_qid *qid, struct p9_fid *fid, struct p9_stat_dotl *st, int new) { int retval; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; int (*test)(struct inode *inode, void *data); if (new) test = v9fs_test_new_inode_dotl; else test = v9fs_test_inode_dotl; inode = iget5_locked(sb, QID2INO(qid), test, v9fs_set_inode_dotl, st); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode_state_read_once(inode) & I_NEW)) return inode; /* * initialize the inode with the stat info * FIXME!! we may need support for stale inodes * later. */ inode->i_ino = QID2INO(qid); retval = v9fs_init_inode(v9ses, inode, st->st_mode, new_decode_dev(st->st_rdev)); if (retval) goto error; v9fs_stat2inode_dotl(st, inode, 0); v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) goto error; unlock_new_inode(inode); return inode; error: iget_failed(inode); return ERR_PTR(retval); } struct inode * v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new) { struct p9_stat_dotl *st; struct inode *inode = NULL; st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN); if (IS_ERR(st)) return ERR_CAST(st); inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new); kfree(st); return inode; } struct dotl_openflag_map { int open_flag; int dotl_flag; }; static int v9fs_mapped_dotl_flags(int flags) { int i; int rflags = 0; struct dotl_openflag_map dotl_oflag_map[] = { { O_CREAT, P9_DOTL_CREATE }, { O_EXCL, P9_DOTL_EXCL }, { O_NOCTTY, P9_DOTL_NOCTTY }, { O_APPEND, P9_DOTL_APPEND }, { O_NONBLOCK, P9_DOTL_NONBLOCK }, { O_DSYNC, P9_DOTL_DSYNC }, { FASYNC, P9_DOTL_FASYNC }, { O_DIRECT, P9_DOTL_DIRECT }, { O_LARGEFILE, P9_DOTL_LARGEFILE }, { O_DIRECTORY, P9_DOTL_DIRECTORY }, { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, { O_NOATIME, P9_DOTL_NOATIME }, { O_CLOEXEC, P9_DOTL_CLOEXEC }, { O_SYNC, P9_DOTL_SYNC}, }; for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { if (flags & dotl_oflag_map[i].open_flag) rflags |= dotl_oflag_map[i].dotl_flag; } return rflags; } /** * v9fs_open_to_dotl_flags- convert Linux specific open flags to * plan 9 open flag. * @flags: flags to convert */ int v9fs_open_to_dotl_flags(int flags) { int rflags = 0; /* * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY * and P9_DOTL_NOACCESS */ rflags |= flags & O_ACCMODE; rflags |= v9fs_mapped_dotl_flags(flags); return rflags; } /** * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. * @idmap: The user namespace of the mount * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @omode: create permissions * @excl: True if the file must not yet exist * */ static int v9fs_vfs_create_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, bool excl) { return v9fs_vfs_mknod_dotl(idmap, dir, dentry, omode, 0); } static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct file *file, unsigned int flags, umode_t omode) { int err = 0; kgid_t gid; umode_t mode; int p9_omode = v9fs_open_to_dotl_flags(flags); const unsigned char *name = NULL; struct p9_qid qid; struct inode *inode; struct p9_fid *fid = NULL; struct p9_fid *dfid = NULL, *ofid = NULL; struct v9fs_session_info *v9ses; struct posix_acl *pacl = NULL, *dacl = NULL; if (d_in_lookup(dentry)) { struct dentry *res = v9fs_vfs_lookup(dir, dentry, 0); if (res || d_really_is_positive(dentry)) return finish_no_open(file, res); } /* Only creates */ if (!(flags & O_CREAT)) return finish_no_open(file, NULL); v9ses = v9fs_inode2v9ses(dir); name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%x\n", name, flags, omode); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto out; } /* clone a fid to use for creation */ ofid = clone_fid(dfid); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n", err); goto out; } if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { p9_omode = (p9_omode & ~(P9_OWRITE | P9_DOTL_APPEND)) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n", err); goto out; } v9fs_invalidate_inode_attr(dir); /* instantiate inode and assign the unopened fid to the dentry */ fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto out; } /* Now set the ACL based on the default value */ v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); /* Since we are opening a file, assign the open fid to the file */ err = finish_open(file, dentry, generic_file_open); if (err) goto out; file->private_data = ofid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) { struct v9fs_inode *v9inode = V9FS_I(inode); fscache_use_cookie(v9fs_inode_cookie(v9inode), file->f_mode & FMODE_WRITE); } #endif v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags); v9fs_open_fid_add(inode, &ofid); file->f_mode |= FMODE_CREATED; out: p9_fid_put(dfid); p9_fid_put(ofid); p9_fid_put(fid); v9fs_put_acl(dacl, pacl); return err; } /** * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory * @idmap: The idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @omode: mode for new directory * */ static struct dentry *v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode) { int err; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; kgid_t gid; const unsigned char *name; umode_t mode; struct inode *inode; struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); v9ses = v9fs_inode2v9ses(dir); omode |= S_IFDIR; if (dir->i_mode & S_ISGID) omode |= S_ISGID; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto error; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n", err); goto error; } name = dentry->d_name.name; err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); if (err < 0) goto error; fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } /* instantiate inode and assign the unopened fid to the dentry */ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); err = 0; inc_nlink(dir); v9fs_invalidate_inode_attr(dir); error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); return ERR_PTR(err); } static int v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct inode *inode = d_inode(dentry); struct p9_stat_dotl *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache) { if (S_ISREG(inode->i_mode)) { int retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval); } } fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); /* Ask for all the fields in stat structure. Server will return * whatever it supports */ st = p9_client_getattr_dotl(fid, P9_STATS_ALL); p9_fid_put(fid); if (IS_ERR(st)) return PTR_ERR(st); v9fs_stat2inode_dotl(st, d_inode(dentry), 0); generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; kfree(st); return 0; } /* * Attribute flags. */ #define P9_ATTR_MODE (1 << 0) #define P9_ATTR_UID (1 << 1) #define P9_ATTR_GID (1 << 2) #define P9_ATTR_SIZE (1 << 3) #define P9_ATTR_ATIME (1 << 4) #define P9_ATTR_MTIME (1 << 5) #define P9_ATTR_CTIME (1 << 6) #define P9_ATTR_ATIME_SET (1 << 7) #define P9_ATTR_MTIME_SET (1 << 8) struct dotl_iattr_map { int iattr_valid; int p9_iattr_valid; }; static int v9fs_mapped_iattr_valid(int iattr_valid) { int i; int p9_iattr_valid = 0; struct dotl_iattr_map dotl_iattr_map[] = { { ATTR_MODE, P9_ATTR_MODE }, { ATTR_UID, P9_ATTR_UID }, { ATTR_GID, P9_ATTR_GID }, { ATTR_SIZE, P9_ATTR_SIZE }, { ATTR_ATIME, P9_ATTR_ATIME }, { ATTR_MTIME, P9_ATTR_MTIME }, { ATTR_CTIME, P9_ATTR_CTIME }, { ATTR_ATIME_SET, P9_ATTR_ATIME_SET }, { ATTR_MTIME_SET, P9_ATTR_MTIME_SET }, }; for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) { if (iattr_valid & dotl_iattr_map[i].iattr_valid) p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid; } return p9_iattr_valid; } /** * v9fs_vfs_setattr_dotl - set file metadata * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; struct inode *inode = d_inode(dentry); struct v9fs_session_info __maybe_unused *v9ses; struct p9_fid *fid = NULL; struct p9_iattr_dotl p9attr = { .uid = INVALID_UID, .gid = INVALID_GID, }; p9_debug(P9_DEBUG_VFS, "\n"); retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; v9ses = v9fs_dentry2v9ses(dentry); p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid); if (iattr->ia_valid & ATTR_MODE) p9attr.mode = iattr->ia_mode; if (iattr->ia_valid & ATTR_UID) p9attr.uid = iattr->ia_uid; if (iattr->ia_valid & ATTR_GID) p9attr.gid = iattr->ia_gid; if (iattr->ia_valid & ATTR_SIZE) p9attr.size = iattr->ia_size; if (iattr->ia_valid & ATTR_ATIME_SET) { p9attr.atime_sec = iattr->ia_atime.tv_sec; p9attr.atime_nsec = iattr->ia_atime.tv_nsec; } if (iattr->ia_valid & ATTR_MTIME_SET) { p9attr.mtime_sec = iattr->ia_mtime.tv_sec; p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; } if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; WARN_ON(!fid); } if (!fid) { fid = v9fs_fid_lookup(dentry); use_dentry = 1; } if (IS_ERR(fid)) return PTR_ERR(fid); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { retval = filemap_fdatawrite(inode->i_mapping); if (retval < 0) p9_debug(P9_DEBUG_ERROR, "Flushing file prior to setattr failed: %d\n", retval); } retval = p9_client_setattr(fid, &p9attr); if (retval < 0) { if (use_dentry) p9_fid_put(fid); return retval; } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) fscache_resize_cookie(v9fs_inode_cookie(V9FS_I(inode)), iattr->ia_size); #endif } v9fs_invalidate_inode_attr(inode); setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ retval = v9fs_acl_chmod(inode, fid); if (retval < 0) { if (use_dentry) p9_fid_put(fid); return retval; } } if (use_dentry) p9_fid_put(fid); return 0; } /** * v9fs_stat2inode_dotl - populate an inode structure with stat info * @stat: stat structure * @inode: inode to populate * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, unsigned int flags) { umode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { inode_set_atime(inode, stat->st_atime_sec, stat->st_atime_nsec); inode_set_mtime(inode, stat->st_mtime_sec, stat->st_mtime_nsec); inode_set_ctime(inode, stat->st_ctime_sec, stat->st_ctime_nsec); inode->i_uid = stat->st_uid; inode->i_gid = stat->st_gid; set_nlink(inode, stat->st_nlink); mode = stat->st_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; v9inode->netfs.remote_i_size = stat->st_size; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; } else { if (stat->st_result_mask & P9_STATS_ATIME) { inode_set_atime(inode, stat->st_atime_sec, stat->st_atime_nsec); } if (stat->st_result_mask & P9_STATS_MTIME) { inode_set_mtime(inode, stat->st_mtime_sec, stat->st_mtime_nsec); } if (stat->st_result_mask & P9_STATS_CTIME) { inode_set_ctime(inode, stat->st_ctime_sec, stat->st_ctime_nsec); } if (stat->st_result_mask & P9_STATS_UID) inode->i_uid = stat->st_uid; if (stat->st_result_mask & P9_STATS_GID) inode->i_gid = stat->st_gid; if (stat->st_result_mask & P9_STATS_NLINK) set_nlink(inode, stat->st_nlink); if (stat->st_result_mask & P9_STATS_MODE) { mode = stat->st_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && stat->st_result_mask & P9_STATS_SIZE) { v9inode->netfs.remote_i_size = stat->st_size; v9fs_i_size_write(inode, stat->st_size); } if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; } if (stat->st_result_mask & P9_STATS_GEN) inode->i_generation = stat->st_gen; /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION * because the inode structure does not have fields for them. */ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } static int v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int err; kgid_t gid; const unsigned char *name; struct p9_qid qid; struct p9_fid *dfid; struct p9_fid *fid = NULL; name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "%llu,%s,%s\n", dir->i_ino, name, symname); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return err; } gid = v9fs_get_fsgid_for_create(dir); /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ err = p9_client_symlink(dfid, name, symname, gid, &qid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); goto error; } v9fs_invalidate_inode_attr(dir); error: p9_fid_put(fid); p9_fid_put(dfid); return err; } /** * v9fs_vfs_link_dotl - create a hardlink for dotl * @old_dentry: dentry for file to link to * @dir: inode destination for new link * @dentry: dentry for link * */ static int v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int err; struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "dir ino: %llu, old_name: %pd, new_name: %pd\n", dir->i_ino, old_dentry, dentry); v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); oldfid = v9fs_fid_lookup(old_dentry); if (IS_ERR(oldfid)) { p9_fid_put(dfid); return PTR_ERR(oldfid); } err = p9_client_link(dfid, oldfid, dentry->d_name.name); p9_fid_put(dfid); p9_fid_put(oldfid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); return err; } v9fs_invalidate_inode_attr(dir); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { /* Get the latest stat info from server. */ struct p9_fid *fid; fid = v9fs_fid_lookup(old_dentry); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_refresh_inode_dotl(fid, d_inode(old_dentry)); p9_fid_put(fid); } ihold(d_inode(old_dentry)); d_instantiate(dentry, d_inode(old_dentry)); return err; } /** * v9fs_vfs_mknod_dotl - create a special file * @idmap: The idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @omode: mode for creation * @rdev: device associated with special file * */ static int v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev) { int err; kgid_t gid; const unsigned char *name; umode_t mode; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, " %llu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto error; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n", err); goto error; } name = dentry->d_name.name; err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); if (err < 0) goto error; v9fs_invalidate_inode_attr(dir); fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); err = 0; error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); return err; } /** * v9fs_vfs_get_link_dotl - follow a symlink path * @dentry: dentry for symlink * @inode: inode for symlink * @done: destructor for return value */ static const char * v9fs_vfs_get_link_dotl(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct p9_fid *fid; char *target; int retval; if (!dentry) return ERR_PTR(-ECHILD); p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); retval = p9_client_readlink(fid, &target); p9_fid_put(fid); if (retval) return ERR_PTR(retval); set_delayed_call(done, kfree_link, target); return target; } int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) { struct p9_stat_dotl *st; struct v9fs_session_info *v9ses; unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_getattr_dotl(fid, P9_STATS_ALL); if (IS_ERR(st)) return PTR_ERR(st); /* * Don't update inode if the file type is different */ if (inode_wrong_type(inode, st->st_mode)) goto out; /* * We don't want to refresh inode->i_size, * because we may have cached data */ flags = (v9ses->cache & CACHE_LOOSE) ? V9FS_STAT2INODE_KEEP_ISIZE : 0; v9fs_stat2inode_dotl(st, inode, flags); out: kfree(st); return 0; } const struct inode_operations v9fs_dir_inode_operations_dotl = { .create = v9fs_vfs_create_dotl, .atomic_open = v9fs_vfs_atomic_open_dotl, .lookup = v9fs_vfs_lookup, .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir_dotl, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_file_inode_operations_dotl = { .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_symlink_inode_operations_dotl = { .get_link = v9fs_vfs_get_link_dotl, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, };
1222 17 1223 1228 4 1 4 134 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 // SPDX-License-Identifier: GPL-2.0-only /* * Landlock - Credential hooks * * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI * Copyright © 2024-2025 Microsoft Corporation */ #include <linux/binfmts.h> #include <linux/cred.h> #include <linux/lsm_hooks.h> #include "common.h" #include "cred.h" #include "ruleset.h" #include "setup.h" static void hook_cred_transfer(struct cred *const new, const struct cred *const old) { const struct landlock_cred_security *const old_llcred = landlock_cred(old); landlock_get_ruleset(old_llcred->domain); *landlock_cred(new) = *old_llcred; } static int hook_cred_prepare(struct cred *const new, const struct cred *const old, const gfp_t gfp) { hook_cred_transfer(new, old); return 0; } static void hook_cred_free(struct cred *const cred) { struct landlock_ruleset *const dom = landlock_cred(cred)->domain; if (dom) landlock_put_ruleset_deferred(dom); } #ifdef CONFIG_AUDIT static int hook_bprm_creds_for_exec(struct linux_binprm *const bprm) { /* Resets for each execution. */ landlock_cred(bprm->cred)->domain_exec = 0; return 0; } #endif /* CONFIG_AUDIT */ static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, hook_cred_prepare), LSM_HOOK_INIT(cred_transfer, hook_cred_transfer), LSM_HOOK_INIT(cred_free, hook_cred_free), #ifdef CONFIG_AUDIT LSM_HOOK_INIT(bprm_creds_for_exec, hook_bprm_creds_for_exec), #endif /* CONFIG_AUDIT */ }; __init void landlock_add_cred_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); }
169 6510 6406 16 4014 16 3762 20 1176 38 38 503 5843 29 30 17 4358 7545 59 503 9787 7164 8960 8515 5812 5842 5410 656 236 166 164 3 15 168 168 168 164 17 168 168 165 1091 1088 341 3228 41 41 41 986 164 164 164 33 376 1790 1684 3012 4320 4321 2979 225 364 389 388 208 388 171 365 1 362 176 193 74 386 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_NOTIFY_H #define _LINUX_FS_NOTIFY_H /* * include/linux/fsnotify.h - generic hooks for filesystem notification, to * reduce in-source duplication from both dnotify and inotify. * * We don't compile any of this away in some complicated menagerie of ifdefs. * Instead, we rely on the code inside to optimize away as needed. * * (C) Copyright 2005 Robert Love */ #include <linux/fsnotify_backend.h> #include <linux/audit.h> #include <linux/slab.h> #include <linux/bug.h> /* Are there any inode/mount/sb objects watched with priority prio or above? */ static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, int prio) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); /* Were any marks ever added to any object on this sb? */ if (!sbinfo) return false; return atomic_long_read(&sbinfo->watched_objects[prio]); } /* Are there any inode/mount/sb objects that are being watched at all? */ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) { return fsnotify_sb_has_priority_watchers(sb, 0); } /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only * the child is interested and not the parent. */ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, __u32 mask) { fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0); } static inline void fsnotify_inode(struct inode *inode, __u32 mask) { if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; fsnotify(mask, inode, FSNOTIFY_EVENT_INODE, NULL, NULL, inode, 0); } /* Notify this dentry's parent about a child's events. */ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { struct inode *inode = d_inode(dentry); if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { mask |= FS_ISDIR; /* sb/mount marks are not interested in name of directory */ if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) goto notify_child; } /* disconnected dentry cannot notify parent */ if (IS_ROOT(dentry)) goto notify_child; return __fsnotify_parent(dentry, mask, data, data_type); notify_child: return fsnotify(mask, data, data_type, NULL, NULL, inode, 0); } /* * Simple wrappers to consolidate calls to fsnotify_parent() when an event * is on a file/dentry. */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } static inline int fsnotify_path(const struct path *path, __u32 mask) { return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } static inline int fsnotify_file(struct file *file, __u32 mask) { /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS int fsnotify_open_perm_and_set_mode(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection * must not be held. */ lockdep_assert_once(file_write_not_started(file)); if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) return 0; /* * read()/write() and other types of access generate pre-content events. */ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { int ret = fsnotify_pre_content(&file->f_path, ppos, count); if (ret) return ret; } if (!(perm_mask & MAY_READ) || likely(!FMODE_FSNOTIFY_ACCESS_PERM(file->f_mode))) return 0; /* * read() also generates the legacy FS_ACCESS_PERM event, so content * scanners can inspect the content filled by pre-content event. */ return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } /* * fsnotify_mmap_perm - permission hook before mmap of file range */ static inline int fsnotify_mmap_perm(struct file *file, int prot, const loff_t off, size_t len) { /* * mmap() generates only pre-content events. */ if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode))) return 0; return fsnotify_pre_content(&file->f_path, &off, len); } /* * fsnotify_truncate_perm - permission hook before file truncate */ static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { struct inode *inode = d_inode(path->dentry); if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) || !fsnotify_sb_has_priority_watchers(inode->i_sb, FSNOTIFY_PRIO_PRE_CONTENT)) return 0; return fsnotify_pre_content(path, &length, 0); } /* * fsnotify_file_perm - permission hook before file access (unknown range) */ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return fsnotify_file_area_perm(file, perm_mask, NULL, 0); } #else static inline int fsnotify_open_perm_and_set_mode(struct file *file) { return 0; } static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { return 0; } static inline int fsnotify_mmap_perm(struct file *file, int prot, const loff_t off, size_t len) { return 0; } static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { return 0; } static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; } #endif /* * fsnotify_link_count - inode's link count changed */ static inline void fsnotify_link_count(struct inode *inode) { fsnotify_inode(inode, FS_ATTRIB); } /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, const struct qstr *old_name, int isdir, struct inode *target, struct dentry *moved) { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; rename_mask |= FS_ISDIR; } /* Event with information about both old and new parent+name */ fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, old_dir, old_name, 0); fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_dir, old_name, fs_cookie); fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_dir, new_name, fs_cookie); if (target) fsnotify_link_count(target); fsnotify_inode(source, FS_MOVE_SELF); audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE); } /* * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed */ static inline void fsnotify_inode_delete(struct inode *inode) { __fsnotify_inode_delete(inode); } /* * fsnotify_vfsmount_delete - a vfsmount is being destroyed, clean up is needed */ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) { __fsnotify_vfsmount_delete(mnt); } static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns) { __fsnotify_mntns_delete(mntns); } /* * fsnotify_inoderemove - an inode is going away */ static inline void fsnotify_inoderemove(struct inode *inode) { fsnotify_inode(inode, FS_DELETE_SELF); __fsnotify_inode_delete(inode); } /* * fsnotify_create - 'name' was linked in * * Caller must make sure that dentry->d_name is stable. * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate * ->d_inode later */ static inline void fsnotify_create(struct inode *dir, struct dentry *dentry) { audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_dirent(dir, dentry, FS_CREATE); } /* * fsnotify_link - new hardlink in 'inode' directory * * Caller must make sure that new_dentry->d_name is stable. * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) { fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE, dir, &new_dentry->d_name, 0); } /* * fsnotify_delete - @dentry was unlinked and unhashed * * Caller must make sure that dentry->d_name is stable. * * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode * as this may be called after d_delete() and old_dentry may be negative. */ static inline void fsnotify_delete(struct inode *dir, struct inode *inode, struct dentry *dentry) { __u32 mask = FS_DELETE; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, 0); } /** * d_delete_notify - delete a dentry and call fsnotify_delete() * @dentry: The dentry to delete * * This helper is used to guaranty that the unlinked inode cannot be found * by lookup of this name after fsnotify_delete() event has been delivered. */ static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); ihold(inode); d_delete(dentry); fsnotify_delete(dir, inode, dentry); iput(inode); } /* * fsnotify_unlink - 'name' was unlinked * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { if (WARN_ON_ONCE(d_is_negative(dentry))) return; fsnotify_delete(dir, d_inode(dentry), dentry); } /* * fsnotify_mkdir - directory 'name' was created * * Caller must make sure that dentry->d_name is stable. * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate * ->d_inode later */ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) { audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR); } /* * fsnotify_rmdir - directory 'name' was removed * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { if (WARN_ON_ONCE(d_is_negative(dentry))) return; fsnotify_delete(dir, d_inode(dentry), dentry); } /* * fsnotify_access - file was read */ static inline void fsnotify_access(struct file *file) { fsnotify_file(file, FS_ACCESS); } /* * fsnotify_modify - file was modified */ static inline void fsnotify_modify(struct file *file) { fsnotify_file(file, FS_MODIFY); } /* * fsnotify_open - file was opened */ static inline void fsnotify_open(struct file *file) { __u32 mask = FS_OPEN; if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; fsnotify_file(file, mask); } /* * fsnotify_close - file was closed */ static inline void fsnotify_close(struct file *file) { __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; fsnotify_file(file, mask); } /* * fsnotify_xattr - extended attributes were changed */ static inline void fsnotify_xattr(struct dentry *dentry) { fsnotify_dentry(dentry, FS_ATTRIB); } /* * fsnotify_change - notify_change event. file was modified and/or metadata * was changed. */ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { __u32 mask = 0; if (ia_valid & ATTR_UID) mask |= FS_ATTRIB; if (ia_valid & ATTR_GID) mask |= FS_ATTRIB; if (ia_valid & ATTR_SIZE) mask |= FS_MODIFY; /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) mask |= FS_ATTRIB; else if (ia_valid & ATTR_ATIME) mask |= FS_ACCESS; else if (ia_valid & ATTR_MTIME) mask |= FS_MODIFY; if (ia_valid & ATTR_MODE) mask |= FS_ATTRIB; if (mask) fsnotify_dentry(dentry, mask); } static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) { fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); } static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt) { fsnotify_mnt(FS_MNT_DETACH, ns, mnt); } static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt) { fsnotify_mnt(FS_MNT_MOVE, ns, mnt); } #endif /* _LINUX_FS_NOTIFY_H */
25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_VMX_PMU_INTEL_H #define __KVM_X86_VMX_PMU_INTEL_H #include <linux/kvm_host.h> #include "cpuid.h" static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) { if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM)) return 0; return vcpu->arch.perf_capabilities; } static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) { return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0; } bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu); int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); struct lbr_desc { /* Basic info about guest LBR records. */ struct x86_pmu_lbr records; /* * Emulate LBR feature via passthrough LBR registers when the * per-vcpu guest LBR event is scheduled on the current pcpu. * * The records may be inaccurate if the host reclaims the LBR. */ struct perf_event *event; /* True if LBRs are marked as not intercepted in the MSR bitmap */ bool msr_passthrough; }; extern struct x86_pmu_lbr vmx_lbr_caps; #endif /* __KVM_X86_VMX_PMU_INTEL_H */
5 5 5 5 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat * * based in parts on udlfb.c: * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> */ #include <linux/bitfield.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> #include "udl_drv.h" #include "udl_edid.h" #include "udl_proto.h" /* * All DisplayLink bulk operations start with 0xaf (UDL_MSG_BULK), followed by * a specific command code. All operations are written to a command buffer, which * the driver sends to the device. */ static char *udl_set_register(char *buf, u8 reg, u8 val) { *buf++ = UDL_MSG_BULK; *buf++ = UDL_CMD_WRITEREG; *buf++ = reg; *buf++ = val; return buf; } static char *udl_vidreg_lock(char *buf) { return udl_set_register(buf, UDL_REG_VIDREG, UDL_VIDREG_LOCK); } static char *udl_vidreg_unlock(char *buf) { return udl_set_register(buf, UDL_REG_VIDREG, UDL_VIDREG_UNLOCK); } static char *udl_set_blank_mode(char *buf, u8 mode) { return udl_set_register(buf, UDL_REG_BLANKMODE, mode); } static char *udl_set_color_depth(char *buf, u8 selection) { return udl_set_register(buf, UDL_REG_COLORDEPTH, selection); } static char *udl_set_base16bpp(char *buf, u32 base) { /* the base pointer is 24 bits wide, 0x20 is hi byte. */ u8 reg20 = FIELD_GET(UDL_BASE_ADDR2_MASK, base); u8 reg21 = FIELD_GET(UDL_BASE_ADDR1_MASK, base); u8 reg22 = FIELD_GET(UDL_BASE_ADDR0_MASK, base); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR2, reg20); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR1, reg21); buf = udl_set_register(buf, UDL_REG_BASE16BPP_ADDR0, reg22); return buf; } /* * DisplayLink HW has separate 16bpp and 8bpp framebuffers. * In 24bpp modes, the low 323 RGB bits go in the 8bpp framebuffer */ static char *udl_set_base8bpp(char *buf, u32 base) { /* the base pointer is 24 bits wide, 0x26 is hi byte. */ u8 reg26 = FIELD_GET(UDL_BASE_ADDR2_MASK, base); u8 reg27 = FIELD_GET(UDL_BASE_ADDR1_MASK, base); u8 reg28 = FIELD_GET(UDL_BASE_ADDR0_MASK, base); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR2, reg26); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR1, reg27); buf = udl_set_register(buf, UDL_REG_BASE8BPP_ADDR0, reg28); return buf; } static char *udl_set_register_16(char *wrptr, u8 reg, u16 value) { wrptr = udl_set_register(wrptr, reg, value >> 8); return udl_set_register(wrptr, reg+1, value); } /* * This is kind of weird because the controller takes some * register values in a different byte order than other registers. */ static char *udl_set_register_16be(char *wrptr, u8 reg, u16 value) { wrptr = udl_set_register(wrptr, reg, value); return udl_set_register(wrptr, reg+1, value >> 8); } /* * LFSR is linear feedback shift register. The reason we have this is * because the display controller needs to minimize the clock depth of * various counters used in the display path. So this code reverses the * provided value into the lfsr16 value by counting backwards to get * the value that needs to be set in the hardware comparator to get the * same actual count. This makes sense once you read above a couple of * times and think about it from a hardware perspective. */ static u16 udl_lfsr16(u16 actual_count) { u32 lv = 0xFFFF; /* This is the lfsr value that the hw starts with */ while (actual_count--) { lv = ((lv << 1) | (((lv >> 15) ^ (lv >> 4) ^ (lv >> 2) ^ (lv >> 1)) & 1)) & 0xFFFF; } return (u16) lv; } /* * This does LFSR conversion on the value that is to be written. * See LFSR explanation above for more detail. */ static char *udl_set_register_lfsr16(char *wrptr, u8 reg, u16 value) { return udl_set_register_16(wrptr, reg, udl_lfsr16(value)); } /* * Takes a DRM display mode and converts it into the DisplayLink * equivalent register commands. */ static char *udl_set_display_mode(char *buf, struct drm_display_mode *mode) { u16 reg01 = mode->crtc_htotal - mode->crtc_hsync_start; u16 reg03 = reg01 + mode->crtc_hdisplay; u16 reg05 = mode->crtc_vtotal - mode->crtc_vsync_start; u16 reg07 = reg05 + mode->crtc_vdisplay; u16 reg09 = mode->crtc_htotal - 1; u16 reg0b = 1; /* libdlo hardcodes hsync start to 1 */ u16 reg0d = mode->crtc_hsync_end - mode->crtc_hsync_start + 1; u16 reg0f = mode->hdisplay; u16 reg11 = mode->crtc_vtotal; u16 reg13 = 0; /* libdlo hardcodes vsync start to 0 */ u16 reg15 = mode->crtc_vsync_end - mode->crtc_vsync_start; u16 reg17 = mode->crtc_vdisplay; u16 reg1b = mode->clock / 5; buf = udl_set_register_lfsr16(buf, UDL_REG_XDISPLAYSTART, reg01); buf = udl_set_register_lfsr16(buf, UDL_REG_XDISPLAYEND, reg03); buf = udl_set_register_lfsr16(buf, UDL_REG_YDISPLAYSTART, reg05); buf = udl_set_register_lfsr16(buf, UDL_REG_YDISPLAYEND, reg07); buf = udl_set_register_lfsr16(buf, UDL_REG_XENDCOUNT, reg09); buf = udl_set_register_lfsr16(buf, UDL_REG_HSYNCSTART, reg0b); buf = udl_set_register_lfsr16(buf, UDL_REG_HSYNCEND, reg0d); buf = udl_set_register_16(buf, UDL_REG_HPIXELS, reg0f); buf = udl_set_register_lfsr16(buf, UDL_REG_YENDCOUNT, reg11); buf = udl_set_register_lfsr16(buf, UDL_REG_VSYNCSTART, reg13); buf = udl_set_register_lfsr16(buf, UDL_REG_VSYNCEND, reg15); buf = udl_set_register_16(buf, UDL_REG_VPIXELS, reg17); buf = udl_set_register_16be(buf, UDL_REG_PIXELCLOCK5KHZ, reg1b); return buf; } static char *udl_dummy_render(char *wrptr) { *wrptr++ = UDL_MSG_BULK; *wrptr++ = UDL_CMD_WRITECOPY16; *wrptr++ = 0x00; /* from addr */ *wrptr++ = 0x00; *wrptr++ = 0x00; *wrptr++ = 0x01; /* one pixel */ *wrptr++ = 0x00; /* to address */ *wrptr++ = 0x00; *wrptr++ = 0x00; return wrptr; } static long udl_log_cpp(unsigned int cpp) { if (WARN_ON(!is_power_of_2(cpp))) return -EINVAL; return __ffs(cpp); } static int udl_handle_damage(struct drm_framebuffer *fb, const struct iosys_map *map, const struct drm_rect *clip) { struct drm_device *dev = fb->dev; struct udl_device *udl = to_udl(dev); void *vaddr = map->vaddr; /* TODO: Use mapping abstraction properly */ int i, ret; char *cmd; struct urb *urb; int log_bpp; ret = udl_log_cpp(fb->format->cpp[0]); if (ret < 0) return ret; log_bpp = ret; urb = udl_get_urb(udl); if (!urb) return -ENOMEM; cmd = urb->transfer_buffer; for (i = clip->y1; i < clip->y2; i++) { const int line_offset = fb->pitches[0] * i; const int byte_offset = line_offset + (clip->x1 << log_bpp); const int dev_byte_offset = (fb->width * i + clip->x1) << log_bpp; const int byte_width = drm_rect_width(clip) << log_bpp; ret = udl_render_hline(udl, log_bpp, &urb, (char *)vaddr, &cmd, byte_offset, dev_byte_offset, byte_width); if (ret) return ret; } if (cmd > (char *)urb->transfer_buffer) { /* Send partial buffer remaining before exiting */ int len; if (cmd < (char *)urb->transfer_buffer + urb->transfer_buffer_length) *cmd++ = UDL_MSG_BULK; len = cmd - (char *)urb->transfer_buffer; ret = udl_submit_urb(udl, urb, len); } else { udl_urb_completion(urb); } return 0; } /* * Primary plane */ static const uint32_t udl_primary_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, }; static const uint64_t udl_primary_plane_fmtmods[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; static int udl_primary_plane_helper_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_crtc *new_crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state = NULL; if (new_crtc) new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); return drm_atomic_helper_check_plane_state(new_plane_state, new_crtc_state, DRM_PLANE_NO_SCALING, DRM_PLANE_NO_SCALING, false, false); } static void udl_primary_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { struct drm_device *dev = plane->dev; struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); struct drm_atomic_helper_damage_iter iter; struct drm_rect damage; int ret, idx; if (!fb) return; /* no framebuffer; plane is disabled */ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) return; if (!drm_dev_enter(dev, &idx)) goto out_drm_gem_fb_end_cpu_access; drm_atomic_helper_damage_iter_init(&iter, old_plane_state, plane_state); drm_atomic_for_each_plane_damage(&iter, &damage) { udl_handle_damage(fb, &shadow_plane_state->data[0], &damage); } drm_dev_exit(idx); out_drm_gem_fb_end_cpu_access: drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); } static const struct drm_plane_helper_funcs udl_primary_plane_helper_funcs = { DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, .atomic_check = udl_primary_plane_helper_atomic_check, .atomic_update = udl_primary_plane_helper_atomic_update, }; static const struct drm_plane_funcs udl_primary_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = drm_plane_cleanup, DRM_GEM_SHADOW_PLANE_FUNCS, }; /* * CRTC */ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct udl_device *udl = to_udl(dev); struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct drm_display_mode *mode = &crtc_state->mode; struct urb *urb; char *buf; int idx; if (!drm_dev_enter(dev, &idx)) return; urb = udl_get_urb(udl); if (!urb) goto out; buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); buf = udl_set_color_depth(buf, UDL_COLORDEPTH_16BPP); /* set base for 16bpp segment to 0 */ buf = udl_set_base16bpp(buf, 0); /* set base for 8bpp segment to end of fb */ buf = udl_set_base8bpp(buf, 2 * mode->vdisplay * mode->hdisplay); buf = udl_set_display_mode(buf, mode); buf = udl_set_blank_mode(buf, UDL_BLANKMODE_ON); buf = udl_vidreg_unlock(buf); buf = udl_dummy_render(buf); udl_submit_urb(udl, urb, buf - (char *)urb->transfer_buffer); out: drm_dev_exit(idx); } static void udl_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct udl_device *udl = to_udl(dev); struct urb *urb; char *buf; int idx; if (!drm_dev_enter(dev, &idx)) return; urb = udl_get_urb(udl); if (!urb) goto out; buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); buf = udl_set_blank_mode(buf, UDL_BLANKMODE_POWERDOWN); buf = udl_vidreg_unlock(buf); buf = udl_dummy_render(buf); udl_submit_urb(udl, urb, buf - (char *)urb->transfer_buffer); out: drm_dev_exit(idx); } static const struct drm_crtc_helper_funcs udl_crtc_helper_funcs = { .atomic_check = drm_crtc_helper_atomic_check, .atomic_enable = udl_crtc_helper_atomic_enable, .atomic_disable = udl_crtc_helper_atomic_disable, }; static const struct drm_crtc_funcs udl_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .destroy = drm_crtc_cleanup, .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; /* * Encoder */ static const struct drm_encoder_funcs udl_encoder_funcs = { .destroy = drm_encoder_cleanup, }; /* * Connector */ static int udl_connector_helper_get_modes(struct drm_connector *connector) { const struct drm_edid *drm_edid; int count; drm_edid = udl_edid_read(connector); drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); drm_edid_free(drm_edid); return count; } static int udl_connector_helper_detect_ctx(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { struct udl_device *udl = to_udl(connector->dev); if (udl_probe_edid(udl)) return connector_status_connected; return connector_status_disconnected; } static const struct drm_connector_helper_funcs udl_connector_helper_funcs = { .get_modes = udl_connector_helper_get_modes, .detect_ctx = udl_connector_helper_detect_ctx, }; static const struct drm_connector_funcs udl_connector_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; /* * Modesetting */ static enum drm_mode_status udl_mode_config_mode_valid(struct drm_device *dev, const struct drm_display_mode *mode) { struct udl_device *udl = to_udl(dev); if (udl->sku_pixel_limit) { if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) return MODE_MEM; } return MODE_OK; } static const struct drm_mode_config_funcs udl_mode_config_funcs = { .fb_create = drm_gem_fb_create_with_dirty, .mode_valid = udl_mode_config_mode_valid, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; int udl_modeset_init(struct udl_device *udl) { struct drm_device *dev = &udl->drm; struct drm_plane *primary_plane; struct drm_crtc *crtc; struct drm_encoder *encoder; struct drm_connector *connector; int ret; ret = drmm_mode_config_init(dev); if (ret) return ret; dev->mode_config.min_width = 640; dev->mode_config.min_height = 480; dev->mode_config.max_width = 2048; dev->mode_config.max_height = 2048; dev->mode_config.preferred_depth = 16; dev->mode_config.funcs = &udl_mode_config_funcs; primary_plane = &udl->primary_plane; ret = drm_universal_plane_init(dev, primary_plane, 0, &udl_primary_plane_funcs, udl_primary_plane_formats, ARRAY_SIZE(udl_primary_plane_formats), udl_primary_plane_fmtmods, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ret; drm_plane_helper_add(primary_plane, &udl_primary_plane_helper_funcs); drm_plane_enable_fb_damage_clips(primary_plane); crtc = &udl->crtc; ret = drm_crtc_init_with_planes(dev, crtc, primary_plane, NULL, &udl_crtc_funcs, NULL); if (ret) return ret; drm_crtc_helper_add(crtc, &udl_crtc_helper_funcs); encoder = &udl->encoder; ret = drm_encoder_init(dev, encoder, &udl_encoder_funcs, DRM_MODE_ENCODER_DAC, NULL); if (ret) return ret; encoder->possible_crtcs = drm_crtc_mask(crtc); connector = &udl->connector; ret = drm_connector_init(dev, connector, &udl_connector_funcs, DRM_MODE_CONNECTOR_VGA); if (ret) return ret; drm_connector_helper_add(connector, &udl_connector_helper_funcs); connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; ret = drm_connector_attach_encoder(connector, encoder); if (ret) return ret; drm_mode_config_reset(dev); drmm_kms_helper_poll_init(dev); return 0; }
112 2 23 23 23 23 27 14 27 1 26 24 14 12 12 1 23 23 23 10 22 7 7 6 5 6 23 23 23 23 23 23 23 23 23 23 23 26 26 23 23 23 23 23 23 23 23 85 85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * This is a replacement of the old ipt_recent module, which carried the * following copyright notice: * * Author: Stephen Frost <sfrost@snowman.net> * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/random.h> #include <linux/jhash.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_recent"); MODULE_ALIAS("ip6t_recent"); static unsigned int ip_list_tot __read_mostly = 100; static unsigned int ip_list_hash_size __read_mostly; static unsigned int ip_list_perms __read_mostly = 0644; static unsigned int ip_list_uid __read_mostly; static unsigned int ip_list_gid __read_mostly; module_param(ip_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); module_param(ip_list_uid, uint, 0644); module_param(ip_list_gid, uint, 0644); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files"); /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)"); #define XT_RECENT_MAX_NSTAMPS 65536 struct recent_entry { struct list_head list; struct list_head lru_list; union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; u_int16_t index; u_int16_t nstamps; unsigned long stamps[]; }; struct recent_table { struct list_head list; char name[XT_RECENT_NAME_LEN]; union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; u_int16_t nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; struct recent_net { struct list_head tables; #ifdef CONFIG_PROC_FS struct proc_dir_entry *xt_recent; #endif }; static unsigned int recent_net_id __read_mostly; static inline struct recent_net *recent_pernet(struct net *net) { return net_generic(net, recent_net_id); } static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS static const struct proc_ops recent_mt_proc_ops; #endif static u_int32_t hash_rnd __read_mostly; static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { return jhash_1word((__force u32)addr->ip, hash_rnd) & (ip_list_hash_size - 1); } static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr) { return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) & (ip_list_hash_size - 1); } static struct recent_entry * recent_entry_lookup(const struct recent_table *table, const union nf_inet_addr *addrp, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; unsigned int h; if (family == NFPROTO_IPV4) h = recent_entry_hash4(addrp); else h = recent_entry_hash6(addrp); list_for_each_entry(e, &table->iphash[h], list) if (e->family == family && memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 && (ttl == e->ttl || ttl == 0 || e->ttl == 0)) return e; return NULL; } static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) { list_del(&e->list); list_del(&e->lru_list); kfree(e); t->entries--; } /* * Drop entries with timestamps older then 'time'. */ static void recent_entry_reap(struct recent_table *t, unsigned long time, struct recent_entry *working, bool update) { struct recent_entry *e; /* * The head of the LRU list is always the oldest entry. */ e = list_entry(t->lru_list.next, struct recent_entry, lru_list); /* * Do not reap the entry which are going to be updated. */ if (e == working && update) return; /* * The last time stamp is the most recent. */ if (time_after(time, e->stamps[e->index-1])) recent_entry_remove(t, e); } static struct recent_entry * recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; unsigned int nstamps_max = t->nstamps_max_mask; if (t->entries >= ip_list_tot) { e = list_entry(t->lru_list.next, struct recent_entry, lru_list); recent_entry_remove(t, e); } nstamps_max += 1; e = kmalloc_flex(*e, stamps, nstamps_max, GFP_ATOMIC); if (e == NULL) return NULL; memcpy(&e->addr, addr, sizeof(e->addr)); e->ttl = ttl; e->stamps[0] = jiffies; e->nstamps = 1; e->index = 1; e->family = family; if (family == NFPROTO_IPV4) list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]); else list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]); list_add_tail(&e->lru_list, &t->lru_list); t->entries++; return e; } static void recent_entry_update(struct recent_table *t, struct recent_entry *e) { e->index &= t->nstamps_max_mask; e->stamps[e->index++] = jiffies; if (e->index > e->nstamps) e->nstamps = e->index; list_move_tail(&e->lru_list, &t->lru_list); } static struct recent_table *recent_table_lookup(struct recent_net *recent_net, const char *name) { struct recent_table *t; list_for_each_entry(t, &recent_net->tables, list) if (!strcmp(t->name, name)) return t; return NULL; } static void recent_table_flush(struct recent_table *t) { struct recent_entry *e, *next; unsigned int i; for (i = 0; i < ip_list_hash_size; i++) list_for_each_entry_safe(e, next, &t->iphash[i], list) recent_entry_remove(t, e); } static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; struct recent_entry *e; union nf_inet_addr addr = {}, addr_mask; u_int8_t ttl; bool ret = info->invert; if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) addr.ip = iph->daddr; else addr.ip = iph->saddr; ttl = iph->ttl; } else { const struct ipv6hdr *iph = ipv6_hdr(skb); if (info->side == XT_RECENT_DEST) memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6)); else memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6)); ttl = iph->hop_limit; } /* use TTL as seen before forwarding */ if (xt_out(par) != NULL && (!skb->sk || !net_eq(net, sock_net(skb->sk)))) ttl++; spin_lock_bh(&recent_lock); t = recent_table_lookup(recent_net, info->name); nf_inet_addr_mask(&addr, &addr_mask, &t->mask); e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; goto out; } if (info->check_set & XT_RECENT_SET) ret = !ret; else if (info->check_set & XT_RECENT_REMOVE) { recent_entry_remove(t, e); ret = !ret; } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) { unsigned long time = jiffies - info->seconds * HZ; unsigned int i, hits = 0; for (i = 0; i < e->nstamps; i++) { if (info->seconds && time_after(time, e->stamps[i])) continue; if (!info->hit_count || ++hits >= info->hit_count) { ret = !ret; break; } } /* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) recent_entry_reap(t, time, e, info->check_set & XT_RECENT_UPDATE && ret); } if (info->check_set & XT_RECENT_SET || (info->check_set & XT_RECENT_UPDATE && ret)) { recent_entry_update(t, e); e->ttl = ttl; } out: spin_unlock_bh(&recent_lock); return ret; } static void recent_table_free(void *addr) { kvfree(addr); } static int recent_mt_check(const struct xt_mtchk_param *par, const struct xt_recent_mtinfo_v1 *info) { struct recent_net *recent_net = recent_pernet(par->net); struct recent_table *t; #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; kuid_t uid; kgid_t gid; #endif unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; net_get_random_once(&hash_rnd, sizeof(hash_rnd)); if (info->check_set & ~XT_RECENT_VALID_FLAGS) { pr_info_ratelimited("Unsupported userspace flags (%08x)\n", info->check_set); return -EINVAL; } if (hweight8(info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE | XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) return -EINVAL; if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && (info->seconds || info->hit_count || (info->check_set & XT_RECENT_MODIFIERS))) return -EINVAL; if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n", info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } ret = xt_check_proc_name(info->name, sizeof(info->name)); if (ret) return ret; if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; else if (info->hit_count) nstamp_mask = roundup_pow_of_two(info->hit_count) - 1; else nstamp_mask = 32 - 1; mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { if (nstamp_mask > t->nstamps_max_mask) { spin_lock_bh(&recent_lock); recent_table_flush(t); t->nstamps_max_mask = nstamp_mask; spin_unlock_bh(&recent_lock); } t->refcnt++; ret = 0; goto out; } t = kvzalloc_flex(*t, iphash, ip_list_hash_size); if (t == NULL) { ret = -ENOMEM; goto out; } t->refcnt = 1; t->nstamps_max_mask = nstamp_mask; memcpy(&t->mask, &info->mask, sizeof(t->mask)); strcpy(t->name, info->name); INIT_LIST_HEAD(&t->lru_list); for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS uid = make_kuid(&init_user_ns, ip_list_uid); gid = make_kgid(&init_user_ns, ip_list_gid); if (!uid_valid(uid) || !gid_valid(gid)) { recent_table_free(t); ret = -EINVAL; goto out; } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_proc_ops, t); if (pde == NULL) { recent_table_free(t); ret = -ENOMEM; goto out; } proc_set_user(pde, uid, gid); #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); spin_unlock_bh(&recent_lock); ret = 0; out: mutex_unlock(&recent_mutex); return ret; } static int recent_mt_check_v0(const struct xt_mtchk_param *par) { const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo; struct xt_recent_mtinfo_v1 info_v1; /* Copy revision 0 structure to revision 1 */ memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo)); /* Set default mask to ensure backward compatible behaviour */ memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all)); return recent_mt_check(par, &info_v1); } static int recent_mt_check_v1(const struct xt_mtchk_param *par) { return recent_mt_check(par, par->matchinfo); } static void recent_mt_destroy(const struct xt_mtdtor_param *par) { struct recent_net *recent_net = recent_pernet(par->net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (--t->refcnt == 0) { spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS if (recent_net->xt_recent != NULL) remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); recent_table_free(t); } mutex_unlock(&recent_mutex); } #ifdef CONFIG_PROC_FS struct recent_iter_state { const struct recent_table *table; unsigned int bucket; }; static void *recent_seq_start(struct seq_file *seq, loff_t *pos) __acquires(recent_lock) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; struct recent_entry *e; loff_t p = *pos; spin_lock_bh(&recent_lock); for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) list_for_each_entry(e, &t->iphash[st->bucket], list) if (p-- == 0) return e; return NULL; } static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; const struct recent_entry *e = v; const struct list_head *head = e->list.next; (*pos)++; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) return NULL; head = t->iphash[st->bucket].next; } return list_entry(head, struct recent_entry, list); } static void recent_seq_stop(struct seq_file *s, void *v) __releases(recent_lock) { spin_unlock_bh(&recent_lock); } static int recent_seq_show(struct seq_file *seq, void *v) { const struct recent_entry *e = v; struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; unsigned int i; i = (e->index - 1) & t->nstamps_max_mask; if (e->family == NFPROTO_IPV4) seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.ip, e->ttl, e->stamps[i], e->index); else seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); seq_putc(seq, '\n'); return 0; } static const struct seq_operations recent_seq_ops = { .start = recent_seq_start, .next = recent_seq_next, .stop = recent_seq_stop, .show = recent_seq_show, }; static int recent_seq_open(struct inode *inode, struct file *file) { struct recent_iter_state *st; st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); if (st == NULL) return -ENOMEM; st->table = pde_data(inode); return 0; } static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { struct recent_table *t = pde_data(file_inode(file)); struct recent_entry *e; char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; bool add, succ; if (size == 0) return 0; if (size > sizeof(buf)) size = sizeof(buf); if (copy_from_user(buf, input, size) != 0) return -EFAULT; /* Strict protocol! */ if (*loff != 0) return -ESPIPE; switch (*c) { case '/': /* flush table */ spin_lock_bh(&recent_lock); recent_table_flush(t); spin_unlock_bh(&recent_lock); return size; case '-': /* remove address */ add = false; break; case '+': /* add address */ add = true; break; default: pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n"); return -EINVAL; } ++c; --size; if (strnchr(c, size, ':') != NULL) { family = NFPROTO_IPV6; succ = in6_pton(c, size, (void *)&addr, '\n', NULL); } else { family = NFPROTO_IPV4; succ = in4_pton(c, size, (void *)&addr, '\n', NULL); } if (!succ) return -EINVAL; spin_lock_bh(&recent_lock); e = recent_entry_lookup(t, &addr, family, 0); if (e == NULL) { if (add) recent_entry_init(t, &addr, family, 0); } else { if (add) recent_entry_update(t, e); else recent_entry_remove(t, e); } spin_unlock_bh(&recent_lock); /* Note we removed one above */ *loff += size + 1; return size + 1; } static const struct proc_ops recent_mt_proc_ops = { .proc_open = recent_seq_open, .proc_read = seq_read, .proc_write = recent_mt_proc_write, .proc_release = seq_release_private, .proc_lseek = seq_lseek, }; static int __net_init recent_proc_net_init(struct net *net) { struct recent_net *recent_net = recent_pernet(net); recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net); if (!recent_net->xt_recent) return -ENOMEM; return 0; } static void __net_exit recent_proc_net_exit(struct net *net) { struct recent_net *recent_net = recent_pernet(net); struct recent_table *t; /* recent_net_exit() is called before recent_mt_destroy(). Make sure * that the parent xt_recent proc entry is empty before trying to * remove it. */ spin_lock_bh(&recent_lock); list_for_each_entry(t, &recent_net->tables, list) remove_proc_entry(t->name, recent_net->xt_recent); recent_net->xt_recent = NULL; spin_unlock_bh(&recent_lock); remove_proc_entry("xt_recent", net->proc_net); } #else static inline int recent_proc_net_init(struct net *net) { return 0; } static inline void recent_proc_net_exit(struct net *net) { } #endif /* CONFIG_PROC_FS */ static int __net_init recent_net_init(struct net *net) { struct recent_net *recent_net = recent_pernet(net); INIT_LIST_HEAD(&recent_net->tables); return recent_proc_net_init(net); } static void __net_exit recent_net_exit(struct net *net) { recent_proc_net_exit(net); } static struct pernet_operations recent_net_ops = { .init = recent_net_init, .exit = recent_net_exit, .id = &recent_net_id, .size = sizeof(struct recent_net), }; static struct xt_match recent_mt_reg[] __read_mostly = { { .name = "recent", .revision = 0, .family = NFPROTO_IPV4, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo), .checkentry = recent_mt_check_v0, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 0, .family = NFPROTO_IPV6, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo), .checkentry = recent_mt_check_v0, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 1, .family = NFPROTO_IPV4, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo_v1), .checkentry = recent_mt_check_v1, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 1, .family = NFPROTO_IPV6, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo_v1), .checkentry = recent_mt_check_v1, .destroy = recent_mt_destroy, .me = THIS_MODULE, } }; static int __init recent_mt_init(void) { int err; BUILD_BUG_ON_NOT_POWER_OF_2(XT_RECENT_MAX_NSTAMPS); if (!ip_list_tot || ip_pkt_list_tot >= XT_RECENT_MAX_NSTAMPS) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); err = register_pernet_subsys(&recent_net_ops); if (err) return err; err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); if (err) unregister_pernet_subsys(&recent_net_ops); return err; } static void __exit recent_mt_exit(void) { xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); unregister_pernet_subsys(&recent_net_ops); } module_init(recent_mt_init); module_exit(recent_mt_exit);
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * HID driver for NVIDIA SHIELD peripherals. */ #include <linux/hid.h> #include <linux/idr.h> #include <linux/input-event-codes.h> #include <linux/input.h> #include <linux/jiffies.h> #include <linux/leds.h> #include <linux/module.h> #include <linux/power_supply.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/workqueue.h> #include "hid-ids.h" #define NOT_INIT_STR "NOT INITIALIZED" #define android_map_key(c) hid_map_usage(hi, usage, bit, max, EV_KEY, (c)) enum { HID_USAGE_ANDROID_PLAYPAUSE_BTN = 0xcd, /* Double-tap volume slider */ HID_USAGE_ANDROID_VOLUMEUP_BTN = 0xe9, HID_USAGE_ANDROID_VOLUMEDOWN_BTN = 0xea, HID_USAGE_ANDROID_SEARCH_BTN = 0x221, /* NVIDIA btn on Thunderstrike */ HID_USAGE_ANDROID_HOME_BTN = 0x223, HID_USAGE_ANDROID_BACK_BTN = 0x224, }; enum { SHIELD_FW_VERSION_INITIALIZED = 0, SHIELD_BOARD_INFO_INITIALIZED, SHIELD_BATTERY_STATS_INITIALIZED, SHIELD_CHARGER_STATE_INITIALIZED, }; enum { THUNDERSTRIKE_FW_VERSION_UPDATE = 0, THUNDERSTRIKE_BOARD_INFO_UPDATE, THUNDERSTRIKE_HAPTICS_UPDATE, THUNDERSTRIKE_LED_UPDATE, THUNDERSTRIKE_POWER_SUPPLY_STATS_UPDATE, }; enum { THUNDERSTRIKE_HOSTCMD_REPORT_SIZE = 33, THUNDERSTRIKE_HOSTCMD_REQ_REPORT_ID = 0x4, THUNDERSTRIKE_HOSTCMD_RESP_REPORT_ID = 0x3, }; enum { THUNDERSTRIKE_HOSTCMD_ID_FW_VERSION = 1, THUNDERSTRIKE_HOSTCMD_ID_LED = 6, THUNDERSTRIKE_HOSTCMD_ID_BATTERY, THUNDERSTRIKE_HOSTCMD_ID_BOARD_INFO = 16, THUNDERSTRIKE_HOSTCMD_ID_USB_INIT = 53, THUNDERSTRIKE_HOSTCMD_ID_HAPTICS = 57, THUNDERSTRIKE_HOSTCMD_ID_CHARGER, }; struct power_supply_dev { struct power_supply *psy; struct power_supply_desc desc; }; struct thunderstrike_psy_prop_values { int voltage_min; int voltage_now; int voltage_avg; int voltage_boot; int capacity; int status; int charge_type; int temp; }; static const enum power_supply_property thunderstrike_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_VOLTAGE_MIN, POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, POWER_SUPPLY_PROP_VOLTAGE_NOW, POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_VOLTAGE_BOOT, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_MIN, POWER_SUPPLY_PROP_TEMP_MAX, POWER_SUPPLY_PROP_TEMP_ALERT_MIN, POWER_SUPPLY_PROP_TEMP_ALERT_MAX, }; enum thunderstrike_led_state { THUNDERSTRIKE_LED_OFF = 1, THUNDERSTRIKE_LED_ON = 8, } __packed; static_assert(sizeof(enum thunderstrike_led_state) == 1); struct thunderstrike_hostcmd_battery { __le16 voltage_avg; u8 reserved_at_10; __le16 thermistor; __le16 voltage_min; __le16 voltage_boot; __le16 voltage_now; u8 capacity; } __packed; enum thunderstrike_charger_type { THUNDERSTRIKE_CHARGER_TYPE_NONE = 0, THUNDERSTRIKE_CHARGER_TYPE_TRICKLE, THUNDERSTRIKE_CHARGER_TYPE_NORMAL, } __packed; static_assert(sizeof(enum thunderstrike_charger_type) == 1); enum thunderstrike_charger_state { THUNDERSTRIKE_CHARGER_STATE_UNKNOWN = 0, THUNDERSTRIKE_CHARGER_STATE_DISABLED, THUNDERSTRIKE_CHARGER_STATE_CHARGING, THUNDERSTRIKE_CHARGER_STATE_FULL, THUNDERSTRIKE_CHARGER_STATE_FAILED = 8, } __packed; static_assert(sizeof(enum thunderstrike_charger_state) == 1); struct thunderstrike_hostcmd_charger { u8 connected; enum thunderstrike_charger_type type; enum thunderstrike_charger_state state; } __packed; struct thunderstrike_hostcmd_board_info { __le16 revision; __le16 serial[7]; } __packed; struct thunderstrike_hostcmd_haptics { u8 motor_left; u8 motor_right; } __packed; struct thunderstrike_hostcmd_resp_report { u8 report_id; /* THUNDERSTRIKE_HOSTCMD_RESP_REPORT_ID */ u8 cmd_id; u8 reserved_at_10; union { struct thunderstrike_hostcmd_board_info board_info; struct thunderstrike_hostcmd_haptics motors; __le16 fw_version; enum thunderstrike_led_state led_state; struct thunderstrike_hostcmd_battery battery; struct thunderstrike_hostcmd_charger charger; u8 payload[30]; } __packed; } __packed; static_assert(sizeof(struct thunderstrike_hostcmd_resp_report) == THUNDERSTRIKE_HOSTCMD_REPORT_SIZE); struct thunderstrike_hostcmd_req_report { u8 report_id; /* THUNDERSTRIKE_HOSTCMD_REQ_REPORT_ID */ u8 cmd_id; u8 reserved_at_10; union { struct __packed { u8 update; enum thunderstrike_led_state state; } led; struct __packed { u8 update; struct thunderstrike_hostcmd_haptics motors; } haptics; } __packed; u8 reserved_at_30[27]; } __packed; static_assert(sizeof(struct thunderstrike_hostcmd_req_report) == THUNDERSTRIKE_HOSTCMD_REPORT_SIZE); /* Common struct for shield accessories. */ struct shield_device { struct hid_device *hdev; struct power_supply_dev battery_dev; unsigned long initialized_flags; const char *codename; u16 fw_version; struct { u16 revision; char serial_number[15]; } board_info; }; /* * Non-trivial to uniquely identify Thunderstrike controllers at initialization * time. Use an ID allocator to help with this. */ static DEFINE_IDA(thunderstrike_ida); struct thunderstrike { struct shield_device base; int id; /* Sub-devices */ struct input_dev *haptics_dev; struct led_classdev led_dev; /* Resources */ void *req_report_dmabuf; unsigned long update_flags; struct thunderstrike_hostcmd_haptics haptics_val; spinlock_t haptics_update_lock; u8 led_state : 1; enum thunderstrike_led_state led_value; struct thunderstrike_psy_prop_values psy_stats; spinlock_t psy_stats_lock; struct timer_list psy_stats_timer; struct work_struct hostcmd_req_work; }; static inline void thunderstrike_hostcmd_req_report_init( struct thunderstrike_hostcmd_req_report *report, u8 cmd_id) { memset(report, 0, sizeof(*report)); report->report_id = THUNDERSTRIKE_HOSTCMD_REQ_REPORT_ID; report->cmd_id = cmd_id; } static inline void shield_strrev(char *dest, size_t len, u16 rev) { dest[0] = ('A' - 1) + (rev >> 8); snprintf(&dest[1], len - 1, "%02X", 0xff & rev); } static struct input_dev *shield_allocate_input_dev(struct hid_device *hdev, const char *name_suffix) { struct input_dev *idev; idev = input_allocate_device(); if (!idev) goto err_device; idev->id.bustype = hdev->bus; idev->id.vendor = hdev->vendor; idev->id.product = hdev->product; idev->id.version = hdev->version; idev->uniq = hdev->uniq; idev->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, name_suffix); if (!idev->name) goto err_name; input_set_drvdata(idev, hdev); return idev; err_name: input_free_device(idev); err_device: return ERR_PTR(-ENOMEM); } static struct input_dev *shield_haptics_create( struct shield_device *dev, int (*play_effect)(struct input_dev *, void *, struct ff_effect *)) { struct input_dev *haptics; int ret; if (!IS_ENABLED(CONFIG_NVIDIA_SHIELD_FF)) return NULL; haptics = shield_allocate_input_dev(dev->hdev, "Haptics"); if (IS_ERR(haptics)) return haptics; input_set_capability(haptics, EV_FF, FF_RUMBLE); ret = input_ff_create_memless(haptics, NULL, play_effect); if (ret) goto err; ret = input_register_device(haptics); if (ret) goto err; return haptics; err: input_free_device(haptics); return ERR_PTR(ret); } static inline void thunderstrike_send_hostcmd_request(struct thunderstrike *ts) { struct thunderstrike_hostcmd_req_report *report = ts->req_report_dmabuf; struct shield_device *shield_dev = &ts->base; int ret; ret = hid_hw_raw_request(shield_dev->hdev, report->report_id, ts->req_report_dmabuf, THUNDERSTRIKE_HOSTCMD_REPORT_SIZE, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); if (ret < 0) { hid_err(shield_dev->hdev, "Failed to output Thunderstrike HOSTCMD request HID report due to %pe\n", ERR_PTR(ret)); } } static void thunderstrike_hostcmd_req_work_handler(struct work_struct *work) { struct thunderstrike *ts = container_of(work, struct thunderstrike, hostcmd_req_work); struct thunderstrike_hostcmd_req_report *report; unsigned long flags; report = ts->req_report_dmabuf; if (test_and_clear_bit(THUNDERSTRIKE_FW_VERSION_UPDATE, &ts->update_flags)) { thunderstrike_hostcmd_req_report_init( report, THUNDERSTRIKE_HOSTCMD_ID_FW_VERSION); thunderstrike_send_hostcmd_request(ts); } if (test_and_clear_bit(THUNDERSTRIKE_LED_UPDATE, &ts->update_flags)) { thunderstrike_hostcmd_req_report_init(report, THUNDERSTRIKE_HOSTCMD_ID_LED); report->led.update = 1; report->led.state = ts->led_value; thunderstrike_send_hostcmd_request(ts); } if (test_and_clear_bit(THUNDERSTRIKE_POWER_SUPPLY_STATS_UPDATE, &ts->update_flags)) { thunderstrike_hostcmd_req_report_init( report, THUNDERSTRIKE_HOSTCMD_ID_BATTERY); thunderstrike_send_hostcmd_request(ts); thunderstrike_hostcmd_req_report_init( report, THUNDERSTRIKE_HOSTCMD_ID_CHARGER); thunderstrike_send_hostcmd_request(ts); } if (test_and_clear_bit(THUNDERSTRIKE_BOARD_INFO_UPDATE, &ts->update_flags)) { thunderstrike_hostcmd_req_report_init( report, THUNDERSTRIKE_HOSTCMD_ID_BOARD_INFO); thunderstrike_send_hostcmd_request(ts); } if (test_and_clear_bit(THUNDERSTRIKE_HAPTICS_UPDATE, &ts->update_flags)) { thunderstrike_hostcmd_req_report_init( report, THUNDERSTRIKE_HOSTCMD_ID_HAPTICS); report->haptics.update = 1; spin_lock_irqsave(&ts->haptics_update_lock, flags); report->haptics.motors = ts->haptics_val; spin_unlock_irqrestore(&ts->haptics_update_lock, flags); thunderstrike_send_hostcmd_request(ts); } } static inline void thunderstrike_request_firmware_version(struct thunderstrike *ts) { set_bit(THUNDERSTRIKE_FW_VERSION_UPDATE, &ts->update_flags); schedule_work(&ts->hostcmd_req_work); } static inline void thunderstrike_request_board_info(struct thunderstrike *ts) { set_bit(THUNDERSTRIKE_BOARD_INFO_UPDATE, &ts->update_flags); schedule_work(&ts->hostcmd_req_work); } static inline int thunderstrike_update_haptics(struct thunderstrike *ts, struct thunderstrike_hostcmd_haptics *motors) { unsigned long flags; spin_lock_irqsave(&ts->haptics_update_lock, flags); ts->haptics_val = *motors; spin_unlock_irqrestore(&ts->haptics_update_lock, flags); set_bit(THUNDERSTRIKE_HAPTICS_UPDATE, &ts->update_flags); schedule_work(&ts->hostcmd_req_work); return 0; } static int thunderstrike_play_effect(struct input_dev *idev, void *data, struct ff_effect *effect) { struct hid_device *hdev = input_get_drvdata(idev); struct thunderstrike_hostcmd_haptics motors; struct shield_device *shield_dev; struct thunderstrike *ts; if (effect->type != FF_RUMBLE) return 0; shield_dev = hid_get_drvdata(hdev); ts = container_of(shield_dev, struct thunderstrike, base); /* Thunderstrike motor values range from 0 to 32 inclusively */ motors.motor_left = effect->u.rumble.strong_magnitude / 2047; motors.motor_right = effect->u.rumble.weak_magnitude / 2047; hid_dbg(hdev, "Thunderstrike FF_RUMBLE request, left: %u right: %u\n", motors.motor_left, motors.motor_right); return thunderstrike_update_haptics(ts, &motors); } static enum led_brightness thunderstrike_led_get_brightness(struct led_classdev *led) { struct hid_device *hdev = to_hid_device(led->dev->parent); struct shield_device *shield_dev = hid_get_drvdata(hdev); struct thunderstrike *ts; ts = container_of(shield_dev, struct thunderstrike, base); return ts->led_state; } static void thunderstrike_led_set_brightness(struct led_classdev *led, enum led_brightness value) { struct hid_device *hdev = to_hid_device(led->dev->parent); struct shield_device *shield_dev = hid_get_drvdata(hdev); struct thunderstrike *ts; ts = container_of(shield_dev, struct thunderstrike, base); switch (value) { case LED_OFF: ts->led_value = THUNDERSTRIKE_LED_OFF; break; default: ts->led_value = THUNDERSTRIKE_LED_ON; break; } set_bit(THUNDERSTRIKE_LED_UPDATE, &ts->update_flags); schedule_work(&ts->hostcmd_req_work); } static int thunderstrike_battery_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { struct shield_device *shield_dev = power_supply_get_drvdata(psy); struct thunderstrike_psy_prop_values prop_values; struct thunderstrike *ts; int ret = 0; ts = container_of(shield_dev, struct thunderstrike, base); spin_lock(&ts->psy_stats_lock); prop_values = ts->psy_stats; spin_unlock(&ts->psy_stats_lock); switch (psp) { case POWER_SUPPLY_PROP_STATUS: val->intval = prop_values.status; break; case POWER_SUPPLY_PROP_CHARGE_TYPE: val->intval = prop_values.charge_type; break; case POWER_SUPPLY_PROP_PRESENT: val->intval = 1; break; case POWER_SUPPLY_PROP_VOLTAGE_MIN: val->intval = prop_values.voltage_min; break; case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: val->intval = 2900000; /* 2.9 V */ break; case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: val->intval = 2200000; /* 2.2 V */ break; case POWER_SUPPLY_PROP_VOLTAGE_NOW: val->intval = prop_values.voltage_now; break; case POWER_SUPPLY_PROP_VOLTAGE_AVG: val->intval = prop_values.voltage_avg; break; case POWER_SUPPLY_PROP_VOLTAGE_BOOT: val->intval = prop_values.voltage_boot; break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = prop_values.capacity; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; case POWER_SUPPLY_PROP_TEMP: val->intval = prop_values.temp; break; case POWER_SUPPLY_PROP_TEMP_MIN: val->intval = 0; /* 0 C */ break; case POWER_SUPPLY_PROP_TEMP_MAX: val->intval = 400; /* 40 C */ break; case POWER_SUPPLY_PROP_TEMP_ALERT_MIN: val->intval = 15; /* 1.5 C */ break; case POWER_SUPPLY_PROP_TEMP_ALERT_MAX: val->intval = 380; /* 38 C */ break; default: ret = -EINVAL; break; } return ret; } static inline void thunderstrike_request_psy_stats(struct thunderstrike *ts) { set_bit(THUNDERSTRIKE_POWER_SUPPLY_STATS_UPDATE, &ts->update_flags); schedule_work(&ts->hostcmd_req_work); } static void thunderstrike_psy_stats_timer_handler(struct timer_list *timer) { struct thunderstrike *ts = container_of(timer, struct thunderstrike, psy_stats_timer); thunderstrike_request_psy_stats(ts); /* Query battery statistics from device every five minutes */ mod_timer(timer, jiffies + 300 * HZ); } static void thunderstrike_parse_fw_version_payload(struct shield_device *shield_dev, __le16 fw_version) { shield_dev->fw_version = le16_to_cpu(fw_version); set_bit(SHIELD_FW_VERSION_INITIALIZED, &shield_dev->initialized_flags); hid_dbg(shield_dev->hdev, "Thunderstrike firmware version 0x%04X\n", shield_dev->fw_version); } static void thunderstrike_parse_board_info_payload(struct shield_device *shield_dev, struct thunderstrike_hostcmd_board_info *board_info) { char board_revision_str[4]; int i; shield_dev->board_info.revision = le16_to_cpu(board_info->revision); for (i = 0; i < 7; ++i) { u16 val = le16_to_cpu(board_info->serial[i]); shield_dev->board_info.serial_number[2 * i] = val & 0xFF; shield_dev->board_info.serial_number[2 * i + 1] = val >> 8; } shield_dev->board_info.serial_number[14] = '\0'; set_bit(SHIELD_BOARD_INFO_INITIALIZED, &shield_dev->initialized_flags); shield_strrev(board_revision_str, 4, shield_dev->board_info.revision); hid_dbg(shield_dev->hdev, "Thunderstrike BOARD_REVISION_%s (0x%04X) S/N: %s\n", board_revision_str, shield_dev->board_info.revision, shield_dev->board_info.serial_number); } static inline void thunderstrike_parse_haptics_payload(struct shield_device *shield_dev, struct thunderstrike_hostcmd_haptics *haptics) { hid_dbg(shield_dev->hdev, "Thunderstrike haptics HOSTCMD response, left: %u right: %u\n", haptics->motor_left, haptics->motor_right); } static void thunderstrike_parse_led_payload(struct shield_device *shield_dev, enum thunderstrike_led_state led_state) { struct thunderstrike *ts = container_of(shield_dev, struct thunderstrike, base); switch (led_state) { case THUNDERSTRIKE_LED_OFF: ts->led_state = 0; break; case THUNDERSTRIKE_LED_ON: ts->led_state = 1; break; } hid_dbg(shield_dev->hdev, "Thunderstrike led HOSTCMD response, 0x%02X\n", led_state); } static void thunderstrike_parse_battery_payload( struct shield_device *shield_dev, struct thunderstrike_hostcmd_battery *battery) { struct thunderstrike *ts = container_of(shield_dev, struct thunderstrike, base); u16 hostcmd_voltage_boot = le16_to_cpu(battery->voltage_boot); u16 hostcmd_voltage_avg = le16_to_cpu(battery->voltage_avg); u16 hostcmd_voltage_min = le16_to_cpu(battery->voltage_min); u16 hostcmd_voltage_now = le16_to_cpu(battery->voltage_now); u16 hostcmd_thermistor = le16_to_cpu(battery->thermistor); int voltage_boot, voltage_avg, voltage_min, voltage_now; struct hid_device *hdev = shield_dev->hdev; u8 capacity = battery->capacity; int temp; /* Convert thunderstrike device values to µV and tenths of degree Celsius */ voltage_boot = hostcmd_voltage_boot * 1000; voltage_avg = hostcmd_voltage_avg * 1000; voltage_min = hostcmd_voltage_min * 1000; voltage_now = hostcmd_voltage_now * 1000; temp = (1378 - (int)hostcmd_thermistor) * 10 / 19; /* Copy converted values */ spin_lock(&ts->psy_stats_lock); ts->psy_stats.voltage_boot = voltage_boot; ts->psy_stats.voltage_avg = voltage_avg; ts->psy_stats.voltage_min = voltage_min; ts->psy_stats.voltage_now = voltage_now; ts->psy_stats.capacity = capacity; ts->psy_stats.temp = temp; spin_unlock(&ts->psy_stats_lock); set_bit(SHIELD_BATTERY_STATS_INITIALIZED, &shield_dev->initialized_flags); hid_dbg(hdev, "Thunderstrike battery HOSTCMD response, voltage_avg: %u voltage_now: %u\n", hostcmd_voltage_avg, hostcmd_voltage_now); hid_dbg(hdev, "Thunderstrike battery HOSTCMD response, voltage_boot: %u voltage_min: %u\n", hostcmd_voltage_boot, hostcmd_voltage_min); hid_dbg(hdev, "Thunderstrike battery HOSTCMD response, thermistor: %u\n", hostcmd_thermistor); hid_dbg(hdev, "Thunderstrike battery HOSTCMD response, capacity: %u%%\n", capacity); } static void thunderstrike_parse_charger_payload( struct shield_device *shield_dev, struct thunderstrike_hostcmd_charger *charger) { struct thunderstrike *ts = container_of(shield_dev, struct thunderstrike, base); int charge_type = POWER_SUPPLY_CHARGE_TYPE_UNKNOWN; struct hid_device *hdev = shield_dev->hdev; int status = POWER_SUPPLY_STATUS_UNKNOWN; switch (charger->type) { case THUNDERSTRIKE_CHARGER_TYPE_NONE: charge_type = POWER_SUPPLY_CHARGE_TYPE_NONE; break; case THUNDERSTRIKE_CHARGER_TYPE_TRICKLE: charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE; break; case THUNDERSTRIKE_CHARGER_TYPE_NORMAL: charge_type = POWER_SUPPLY_CHARGE_TYPE_STANDARD; break; default: hid_warn(hdev, "Unhandled Thunderstrike charger HOSTCMD type, %u\n", charger->type); break; } switch (charger->state) { case THUNDERSTRIKE_CHARGER_STATE_UNKNOWN: status = POWER_SUPPLY_STATUS_UNKNOWN; break; case THUNDERSTRIKE_CHARGER_STATE_DISABLED: /* Indicates charger is disconnected */ break; case THUNDERSTRIKE_CHARGER_STATE_CHARGING: status = POWER_SUPPLY_STATUS_CHARGING; break; case THUNDERSTRIKE_CHARGER_STATE_FULL: status = POWER_SUPPLY_STATUS_FULL; break; case THUNDERSTRIKE_CHARGER_STATE_FAILED: status = POWER_SUPPLY_STATUS_NOT_CHARGING; hid_err(hdev, "Thunderstrike device failed to charge\n"); break; default: hid_warn(hdev, "Unhandled Thunderstrike charger HOSTCMD state, %u\n", charger->state); break; } if (!charger->connected) status = POWER_SUPPLY_STATUS_DISCHARGING; spin_lock(&ts->psy_stats_lock); ts->psy_stats.charge_type = charge_type; ts->psy_stats.status = status; spin_unlock(&ts->psy_stats_lock); set_bit(SHIELD_CHARGER_STATE_INITIALIZED, &shield_dev->initialized_flags); hid_dbg(hdev, "Thunderstrike charger HOSTCMD response, connected: %u, type: %u, state: %u\n", charger->connected, charger->type, charger->state); } static inline void thunderstrike_device_init_info(struct shield_device *shield_dev) { struct thunderstrike *ts = container_of(shield_dev, struct thunderstrike, base); if (!test_bit(SHIELD_FW_VERSION_INITIALIZED, &shield_dev->initialized_flags)) thunderstrike_request_firmware_version(ts); if (!test_bit(SHIELD_BOARD_INFO_INITIALIZED, &shield_dev->initialized_flags)) thunderstrike_request_board_info(ts); if (!test_bit(SHIELD_BATTERY_STATS_INITIALIZED, &shield_dev->initialized_flags) || !test_bit(SHIELD_CHARGER_STATE_INITIALIZED, &shield_dev->initialized_flags)) thunderstrike_psy_stats_timer_handler(&ts->psy_stats_timer); } static int thunderstrike_parse_report(struct shield_device *shield_dev, struct hid_report *report, u8 *data, int size) { struct thunderstrike_hostcmd_resp_report *hostcmd_resp_report; struct hid_device *hdev = shield_dev->hdev; switch (report->id) { case THUNDERSTRIKE_HOSTCMD_RESP_REPORT_ID: if (size != THUNDERSTRIKE_HOSTCMD_REPORT_SIZE) { hid_err(hdev, "Encountered Thunderstrike HOSTCMD HID report with unexpected size %d\n", size); return -EINVAL; } hostcmd_resp_report = (struct thunderstrike_hostcmd_resp_report *)data; switch (hostcmd_resp_report->cmd_id) { case THUNDERSTRIKE_HOSTCMD_ID_FW_VERSION: thunderstrike_parse_fw_version_payload( shield_dev, hostcmd_resp_report->fw_version); break; case THUNDERSTRIKE_HOSTCMD_ID_LED: thunderstrike_parse_led_payload(shield_dev, hostcmd_resp_report->led_state); break; case THUNDERSTRIKE_HOSTCMD_ID_BATTERY: thunderstrike_parse_battery_payload(shield_dev, &hostcmd_resp_report->battery); break; case THUNDERSTRIKE_HOSTCMD_ID_BOARD_INFO: thunderstrike_parse_board_info_payload( shield_dev, &hostcmd_resp_report->board_info); break; case THUNDERSTRIKE_HOSTCMD_ID_HAPTICS: thunderstrike_parse_haptics_payload( shield_dev, &hostcmd_resp_report->motors); break; case THUNDERSTRIKE_HOSTCMD_ID_USB_INIT: /* May block HOSTCMD requests till received initially */ thunderstrike_device_init_info(shield_dev); break; case THUNDERSTRIKE_HOSTCMD_ID_CHARGER: /* May block HOSTCMD requests till received initially */ thunderstrike_device_init_info(shield_dev); thunderstrike_parse_charger_payload( shield_dev, &hostcmd_resp_report->charger); break; default: hid_warn(hdev, "Unhandled Thunderstrike HOSTCMD id %d\n", hostcmd_resp_report->cmd_id); return -ENOENT; } break; default: return 0; } return 0; } static inline int thunderstrike_led_create(struct thunderstrike *ts) { struct led_classdev *led = &ts->led_dev; led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); if (!led->name) return -ENOMEM; led->max_brightness = 1; led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; led->brightness_set = &thunderstrike_led_set_brightness; return led_classdev_register(&ts->base.hdev->dev, led); } static inline int thunderstrike_psy_create(struct shield_device *shield_dev) { struct thunderstrike *ts = container_of(shield_dev, struct thunderstrike, base); struct power_supply_config psy_cfg = { .drv_data = shield_dev, }; struct hid_device *hdev = shield_dev->hdev; int ret; /* * Set an initial capacity and temperature value to avoid prematurely * triggering alerts. Will be replaced by values queried from initial * HOSTCMD requests. */ ts->psy_stats.capacity = 100; ts->psy_stats.temp = 182; shield_dev->battery_dev.desc.properties = thunderstrike_battery_props; shield_dev->battery_dev.desc.num_properties = ARRAY_SIZE(thunderstrike_battery_props); shield_dev->battery_dev.desc.get_property = thunderstrike_battery_get_property; shield_dev->battery_dev.desc.type = POWER_SUPPLY_TYPE_BATTERY; shield_dev->battery_dev.desc.name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike_%d", ts->id); if (!shield_dev->battery_dev.desc.name) return -ENOMEM; shield_dev->battery_dev.psy = power_supply_register( &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg); if (IS_ERR(shield_dev->battery_dev.psy)) { hid_err(hdev, "Failed to register Thunderstrike battery device\n"); return PTR_ERR(shield_dev->battery_dev.psy); } ret = power_supply_powers(shield_dev->battery_dev.psy, &hdev->dev); if (ret) { hid_err(hdev, "Failed to associate battery device to Thunderstrike\n"); goto err; } return 0; err: power_supply_unregister(shield_dev->battery_dev.psy); return ret; } static struct shield_device *thunderstrike_create(struct hid_device *hdev) { struct shield_device *shield_dev; struct thunderstrike *ts; int ret; ts = devm_kzalloc(&hdev->dev, sizeof(*ts), GFP_KERNEL); if (!ts) return ERR_PTR(-ENOMEM); ts->req_report_dmabuf = devm_kzalloc( &hdev->dev, THUNDERSTRIKE_HOSTCMD_REPORT_SIZE, GFP_KERNEL); if (!ts->req_report_dmabuf) return ERR_PTR(-ENOMEM); shield_dev = &ts->base; shield_dev->hdev = hdev; shield_dev->codename = "Thunderstrike"; spin_lock_init(&ts->haptics_update_lock); spin_lock_init(&ts->psy_stats_lock); INIT_WORK(&ts->hostcmd_req_work, thunderstrike_hostcmd_req_work_handler); hid_set_drvdata(hdev, shield_dev); ts->id = ida_alloc(&thunderstrike_ida, GFP_KERNEL); if (ts->id < 0) return ERR_PTR(ts->id); ts->haptics_dev = shield_haptics_create(shield_dev, thunderstrike_play_effect); if (IS_ERR(ts->haptics_dev)) { hid_err(hdev, "Failed to create Thunderstrike haptics instance\n"); ret = PTR_ERR(ts->haptics_dev); goto err_id; } ret = thunderstrike_psy_create(shield_dev); if (ret) { hid_err(hdev, "Failed to create Thunderstrike power supply instance\n"); goto err_haptics; } ret = thunderstrike_led_create(ts); if (ret) { hid_err(hdev, "Failed to create Thunderstrike LED instance\n"); goto err_psy; } timer_setup(&ts->psy_stats_timer, thunderstrike_psy_stats_timer_handler, 0); hid_info(hdev, "Registered Thunderstrike controller\n"); return shield_dev; err_psy: power_supply_unregister(shield_dev->battery_dev.psy); err_haptics: if (ts->haptics_dev) input_unregister_device(ts->haptics_dev); err_id: ida_free(&thunderstrike_ida, ts->id); return ERR_PTR(ret); } static void thunderstrike_destroy(struct thunderstrike *ts) { led_classdev_unregister(&ts->led_dev); power_supply_unregister(ts->base.battery_dev.psy); if (ts->haptics_dev) input_unregister_device(ts->haptics_dev); ida_free(&thunderstrike_ida, ts->id); } static int android_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER) return 0; switch (usage->hid & HID_USAGE) { case HID_USAGE_ANDROID_PLAYPAUSE_BTN: android_map_key(KEY_PLAYPAUSE); break; case HID_USAGE_ANDROID_VOLUMEUP_BTN: android_map_key(KEY_VOLUMEUP); break; case HID_USAGE_ANDROID_VOLUMEDOWN_BTN: android_map_key(KEY_VOLUMEDOWN); break; case HID_USAGE_ANDROID_SEARCH_BTN: android_map_key(BTN_Z); break; case HID_USAGE_ANDROID_HOME_BTN: android_map_key(BTN_MODE); break; case HID_USAGE_ANDROID_BACK_BTN: android_map_key(BTN_SELECT); break; default: return 0; } return 1; } static ssize_t firmware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct shield_device *shield_dev; int ret; shield_dev = hid_get_drvdata(hdev); if (test_bit(SHIELD_FW_VERSION_INITIALIZED, &shield_dev->initialized_flags)) ret = sysfs_emit(buf, "0x%04X\n", shield_dev->fw_version); else ret = sysfs_emit(buf, NOT_INIT_STR "\n"); return ret; } static DEVICE_ATTR_RO(firmware_version); static ssize_t hardware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct shield_device *shield_dev; char board_revision_str[4]; int ret; shield_dev = hid_get_drvdata(hdev); if (test_bit(SHIELD_BOARD_INFO_INITIALIZED, &shield_dev->initialized_flags)) { shield_strrev(board_revision_str, 4, shield_dev->board_info.revision); ret = sysfs_emit(buf, "%s BOARD_REVISION_%s (0x%04X)\n", shield_dev->codename, board_revision_str, shield_dev->board_info.revision); } else ret = sysfs_emit(buf, NOT_INIT_STR "\n"); return ret; } static DEVICE_ATTR_RO(hardware_version); static ssize_t serial_number_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct shield_device *shield_dev; int ret; shield_dev = hid_get_drvdata(hdev); if (test_bit(SHIELD_BOARD_INFO_INITIALIZED, &shield_dev->initialized_flags)) ret = sysfs_emit(buf, "%s\n", shield_dev->board_info.serial_number); else ret = sysfs_emit(buf, NOT_INIT_STR "\n"); return ret; } static DEVICE_ATTR_RO(serial_number); static struct attribute *shield_device_attrs[] = { &dev_attr_firmware_version.attr, &dev_attr_hardware_version.attr, &dev_attr_serial_number.attr, NULL, }; ATTRIBUTE_GROUPS(shield_device); static int shield_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct shield_device *dev = hid_get_drvdata(hdev); return thunderstrike_parse_report(dev, report, data, size); } static int shield_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct shield_device *shield_dev = NULL; struct thunderstrike *ts; int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "Parse failed\n"); return ret; } switch (id->product) { case USB_DEVICE_ID_NVIDIA_THUNDERSTRIKE_CONTROLLER: shield_dev = thunderstrike_create(hdev); break; } if (unlikely(!shield_dev)) { hid_err(hdev, "Failed to identify SHIELD device\n"); return -ENODEV; } if (IS_ERR(shield_dev)) { hid_err(hdev, "Failed to create SHIELD device\n"); return PTR_ERR(shield_dev); } ts = container_of(shield_dev, struct thunderstrike, base); ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT); if (ret) { hid_err(hdev, "Failed to start HID device\n"); goto err_ts_create; } ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "Failed to open HID device\n"); goto err_stop; } thunderstrike_device_init_info(shield_dev); return ret; err_stop: hid_hw_stop(hdev); err_ts_create: thunderstrike_destroy(ts); return ret; } static void shield_remove(struct hid_device *hdev) { struct shield_device *dev = hid_get_drvdata(hdev); struct thunderstrike *ts; ts = container_of(dev, struct thunderstrike, base); hid_hw_close(hdev); thunderstrike_destroy(ts); timer_delete_sync(&ts->psy_stats_timer); cancel_work_sync(&ts->hostcmd_req_work); hid_hw_stop(hdev); } static const struct hid_device_id shield_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NVIDIA, USB_DEVICE_ID_NVIDIA_THUNDERSTRIKE_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_NVIDIA, USB_DEVICE_ID_NVIDIA_THUNDERSTRIKE_CONTROLLER) }, { } }; MODULE_DEVICE_TABLE(hid, shield_devices); static struct hid_driver shield_driver = { .name = "shield", .id_table = shield_devices, .input_mapping = android_input_mapping, .probe = shield_probe, .remove = shield_remove, .raw_event = shield_raw_event, .driver = { .dev_groups = shield_device_groups, }, }; module_hid_driver(shield_driver); MODULE_AUTHOR("Rahul Rameshbabu <rrameshbabu@nvidia.com>"); MODULE_DESCRIPTION("HID Driver for NVIDIA SHIELD peripherals."); MODULE_LICENSE("GPL");
33 8 2 27 2 27 2 33 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __FIRMWARE_LOADER_H #define __FIRMWARE_LOADER_H #include <linux/bitops.h> #include <linux/firmware.h> #include <linux/types.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/completion.h> /** * enum fw_opt - options to control firmware loading behaviour * * @FW_OPT_UEVENT: Enables the fallback mechanism to send a kobject uevent * when the firmware is not found. Userspace is in charge to load the * firmware using the sysfs loading facility. * @FW_OPT_NOWAIT: Used to describe the firmware request is asynchronous. * @FW_OPT_USERHELPER: Enable the fallback mechanism, in case the direct * filesystem lookup fails at finding the firmware. For details refer to * firmware_fallback_sysfs(). * @FW_OPT_NO_WARN: Quiet, avoid printing warning messages. * @FW_OPT_NOCACHE: Disables firmware caching. Firmware caching is used to * cache the firmware upon suspend, so that upon resume races against the * firmware file lookup on storage is avoided. Used for calls where the * file may be too big, or where the driver takes charge of its own * firmware caching mechanism. * @FW_OPT_NOFALLBACK_SYSFS: Disable the sysfs fallback mechanism. Takes * precedence over &FW_OPT_UEVENT and &FW_OPT_USERHELPER. * @FW_OPT_FALLBACK_PLATFORM: Enable fallback to device fw copy embedded in * the platform's main firmware. If both this fallback and the sysfs * fallback are enabled, then this fallback will be tried first. * @FW_OPT_PARTIAL: Allow partial read of firmware instead of needing to read * entire file. */ enum fw_opt { FW_OPT_UEVENT = BIT(0), FW_OPT_NOWAIT = BIT(1), FW_OPT_USERHELPER = BIT(2), FW_OPT_NO_WARN = BIT(3), FW_OPT_NOCACHE = BIT(4), FW_OPT_NOFALLBACK_SYSFS = BIT(5), FW_OPT_FALLBACK_PLATFORM = BIT(6), FW_OPT_PARTIAL = BIT(7), }; enum fw_status { FW_STATUS_UNKNOWN, FW_STATUS_LOADING, FW_STATUS_DONE, FW_STATUS_ABORTED, }; /* * Concurrent request_firmware() for the same firmware need to be * serialized. struct fw_state is simple state machine which hold the * state of the firmware loading. */ struct fw_state { struct completion completion; enum fw_status status; }; struct fw_priv { struct kref ref; struct list_head list; struct firmware_cache *fwc; struct fw_state fw_st; void *data; size_t size; size_t allocated_size; size_t offset; u32 opt_flags; #ifdef CONFIG_FW_LOADER_PAGED_BUF bool is_paged_buf; struct page **pages; int nr_pages; int page_array_size; #endif #ifdef CONFIG_FW_LOADER_USER_HELPER bool need_uevent; struct list_head pending_list; #endif const char *fw_name; }; extern struct mutex fw_lock; extern struct firmware_cache fw_cache; extern bool fw_load_abort_all; static inline bool __fw_state_check(struct fw_priv *fw_priv, enum fw_status status) { struct fw_state *fw_st = &fw_priv->fw_st; return fw_st->status == status; } static inline int __fw_state_wait_common(struct fw_priv *fw_priv, long timeout) { struct fw_state *fw_st = &fw_priv->fw_st; long ret; ret = wait_for_completion_killable_timeout(&fw_st->completion, timeout); if (ret != 0 && fw_st->status == FW_STATUS_ABORTED) return -ENOENT; if (!ret) return -ETIMEDOUT; return ret < 0 ? ret : 0; } static inline void __fw_state_set(struct fw_priv *fw_priv, enum fw_status status) { struct fw_state *fw_st = &fw_priv->fw_st; WRITE_ONCE(fw_st->status, status); if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) { #ifdef CONFIG_FW_LOADER_USER_HELPER /* * Doing this here ensures that the fw_priv is deleted from * the pending list in all abort/done paths. */ list_del_init(&fw_priv->pending_list); #endif complete_all(&fw_st->completion); } } static inline void fw_state_aborted(struct fw_priv *fw_priv) { __fw_state_set(fw_priv, FW_STATUS_ABORTED); } static inline bool fw_state_is_aborted(struct fw_priv *fw_priv) { return __fw_state_check(fw_priv, FW_STATUS_ABORTED); } static inline void fw_state_start(struct fw_priv *fw_priv) { __fw_state_set(fw_priv, FW_STATUS_LOADING); } static inline void fw_state_done(struct fw_priv *fw_priv) { __fw_state_set(fw_priv, FW_STATUS_DONE); } static inline bool fw_state_is_done(struct fw_priv *fw_priv) { return __fw_state_check(fw_priv, FW_STATUS_DONE); } static inline bool fw_state_is_loading(struct fw_priv *fw_priv) { return __fw_state_check(fw_priv, FW_STATUS_LOADING); } int alloc_lookup_fw_priv(const char *fw_name, struct firmware_cache *fwc, struct fw_priv **fw_priv, void *dbuf, size_t size, size_t offset, u32 opt_flags); int assign_fw(struct firmware *fw, struct device *device); void free_fw_priv(struct fw_priv *fw_priv); void fw_state_init(struct fw_priv *fw_priv); #ifdef CONFIG_FW_LOADER bool firmware_is_builtin(const struct firmware *fw); bool firmware_request_builtin_buf(struct firmware *fw, const char *name, void *buf, size_t size); #else /* module case */ static inline bool firmware_is_builtin(const struct firmware *fw) { return false; } static inline bool firmware_request_builtin_buf(struct firmware *fw, const char *name, void *buf, size_t size) { return false; } #endif #ifdef CONFIG_FW_LOADER_PAGED_BUF void fw_free_paged_buf(struct fw_priv *fw_priv); int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed); int fw_map_paged_buf(struct fw_priv *fw_priv); bool fw_is_paged_buf(struct fw_priv *fw_priv); #else static inline void fw_free_paged_buf(struct fw_priv *fw_priv) {} static inline int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } static inline int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } static inline bool fw_is_paged_buf(struct fw_priv *fw_priv) { return false; } #endif #endif /* __FIRMWARE_LOADER_H */
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 /* SPDX-License-Identifier: GPL-2.0 */ /* * BSD Process Accounting for Linux - Definitions * * Author: Marco van Wieringen (mvw@planets.elm.net) * * This header file contains the definitions needed to implement * BSD-style process accounting. The kernel accounting code and all * user-level programs that try to do something useful with the * process accounting log must include this file. * * Copyright (C) 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. * */ #ifndef _LINUX_ACCT_H #define _LINUX_ACCT_H #include <uapi/linux/acct.h> #ifdef CONFIG_BSD_PROCESS_ACCT struct pid_namespace; extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); extern void acct_exit_ns(struct pid_namespace *); #else #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) #define acct_exit_ns(ns) do { } while (0) #endif /* * ACCT_VERSION numbers as yet defined: * 0: old format (until 2.6.7) with 16 bit uid/gid * 1: extended variant (binary compatible on M68K) * 2: extended variant (binary compatible on everything except M68K) * 3: new binary incompatible format (64 bytes) * 4: new binary incompatible format (128 bytes) * 5: new binary incompatible format (128 bytes, second half) * */ #undef ACCT_VERSION #undef AHZ #ifdef CONFIG_BSD_PROCESS_ACCT_V3 #define ACCT_VERSION 3 #define AHZ 100 typedef struct acct_v3 acct_t; #else #ifdef CONFIG_M68K #define ACCT_VERSION 1 #else #define ACCT_VERSION 2 #endif #define AHZ (USER_HZ) typedef struct acct acct_t; #endif #include <linux/jiffies.h> /* * Yet another set of HZ to *HZ helper functions. * See <linux/jiffies.h> for the original. */ static inline u32 jiffies_to_AHZ(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 # if HZ < AHZ return x * (AHZ / HZ); # else return x / (HZ / AHZ); # endif #else u64 tmp = (u64)x * TICK_NSEC; do_div(tmp, (NSEC_PER_SEC / AHZ)); return (long)tmp; #endif } static inline u64 nsec_to_AHZ(u64 x) { #if (NSEC_PER_SEC % AHZ) == 0 do_div(x, (NSEC_PER_SEC / AHZ)); #elif (AHZ % 512) == 0 x *= AHZ/512; do_div(x, (NSEC_PER_SEC / 512)); #else /* * max relative error 5.7e-8 (1.8s per year) for AHZ <= 1024, * overflow after 64.99 years. * exact for AHZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... */ x *= 9; do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (AHZ/2)) / AHZ)); #endif return x; } #endif /* _LINUX_ACCT_H */
8 8 11 14 9 9 8 9 8 8 8 8 8 8 8 8 8 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 // SPDX-License-Identifier: GPL-2.0-only /* * LED support for the input layer * * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/init.h> #include <linux/leds.h> #include <linux/input.h> #if IS_ENABLED(CONFIG_VT) #define VT_TRIGGER(_name) .trigger = _name #else #define VT_TRIGGER(_name) .trigger = NULL #endif #if IS_ENABLED(CONFIG_SND_CTL_LED) #define AUDIO_TRIGGER(_name) .trigger = _name #else #define AUDIO_TRIGGER(_name) .trigger = NULL #endif static const struct { const char *name; const char *trigger; } input_led_info[LED_CNT] = { [LED_NUML] = { "numlock", VT_TRIGGER("kbd-numlock") }, [LED_CAPSL] = { "capslock", VT_TRIGGER("kbd-capslock") }, [LED_SCROLLL] = { "scrolllock", VT_TRIGGER("kbd-scrolllock") }, [LED_COMPOSE] = { "compose" }, [LED_KANA] = { "kana", VT_TRIGGER("kbd-kanalock") }, [LED_SLEEP] = { "sleep" } , [LED_SUSPEND] = { "suspend" }, [LED_MUTE] = { "mute", AUDIO_TRIGGER("audio-mute") }, [LED_MISC] = { "misc" }, [LED_MAIL] = { "mail" }, [LED_CHARGING] = { "charging" }, }; struct input_led { struct led_classdev cdev; struct input_handle *handle; unsigned int code; /* One of LED_* constants */ }; struct input_leds { struct input_handle handle; unsigned int num_leds; struct input_led leds[] __counted_by(num_leds); }; static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev) { struct input_led *led = container_of(cdev, struct input_led, cdev); struct input_dev *input = led->handle->dev; return test_bit(led->code, input->led) ? cdev->max_brightness : 0; } static void input_leds_brightness_set(struct led_classdev *cdev, enum led_brightness brightness) { struct input_led *led = container_of(cdev, struct input_led, cdev); input_inject_event(led->handle, EV_LED, led->code, !!brightness); } static void input_leds_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { } static int input_leds_get_count(struct input_dev *dev) { unsigned int led_code; int count = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) if (input_led_info[led_code].name) count++; return count; } static int input_leds_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_leds *leds; struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; int error; num_leds = input_leds_get_count(dev); if (!num_leds) return -ENXIO; leds = kzalloc_flex(*leds, leds, num_leds); if (!leds) return -ENOMEM; leds->num_leds = num_leds; leds->handle.dev = dev; leds->handle.handler = handler; leds->handle.name = "leds"; leds->handle.private = leds; error = input_register_handle(&leds->handle); if (error) goto err_free_mem; error = input_open_device(&leds->handle); if (error) goto err_unregister_handle; led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { if (!input_led_info[led_code].name) continue; led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); if (!led->cdev.name) { error = -ENOMEM; goto err_unregister_leds; } led->cdev.max_brightness = 1; led->cdev.brightness_get = input_leds_brightness_get; led->cdev.brightness_set = input_leds_brightness_set; led->cdev.default_trigger = input_led_info[led_code].trigger; error = led_classdev_register(&dev->dev, &led->cdev); if (error) { dev_err(&dev->dev, "failed to register LED %s: %d\n", led->cdev.name, error); kfree(led->cdev.name); goto err_unregister_leds; } led_no++; } return 0; err_unregister_leds: while (--led_no >= 0) { struct input_led *led = &leds->leds[led_no]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(&leds->handle); err_unregister_handle: input_unregister_handle(&leds->handle); err_free_mem: kfree(leds); return error; } static void input_leds_disconnect(struct input_handle *handle) { struct input_leds *leds = handle->private; int i; for (i = 0; i < leds->num_leds; i++) { struct input_led *led = &leds->leds[i]; led_classdev_unregister(&led->cdev); kfree(led->cdev.name); } input_close_device(handle); input_unregister_handle(handle); kfree(leds); } static const struct input_device_id input_leds_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_LED) }, }, { }, }; MODULE_DEVICE_TABLE(input, input_leds_ids); static struct input_handler input_leds_handler = { .event = input_leds_event, .connect = input_leds_connect, .disconnect = input_leds_disconnect, .name = "leds", .id_table = input_leds_ids, }; static int __init input_leds_init(void) { return input_register_handler(&input_leds_handler); } module_init(input_leds_init); static void __exit input_leds_exit(void) { input_unregister_handler(&input_leds_handler); } module_exit(input_leds_exit); MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>"); MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>"); MODULE_DESCRIPTION("Input -> LEDs Bridge"); MODULE_LICENSE("GPL v2");
24 24 10 10 10 3 10 3 3 3 2 10 3 10 10 10 10 3 10 2 2 2 2 2 2 2 9 9 9 9 2 9 32 33 33 33 33 33 33 10 10 38 15 14 15 2 33 32 32 1 32 21 38 14 38 6 5 20 4 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 12 20 32 32 32 31 32 32 32 32 7 32 32 32 32 22 1 1 20 1 1 1 1 7 7 7 7 7 7 7 7 39 38 5 4 4 34 33 2 32 33 33 33 20 33 33 33 7 7 7 7 26 26 24 24 24 19 6 17 17 17 19 1 2 26 2 2 2 7 7 2 1 2 2 2 6 1 7 7 9 9 33 7 6 7 5 39 39 40 39 40 39 39 6 5 5 5 5 3 5 6 24 24 24 21 24 21 7 20 20 10 16 16 16 16 1 16 16 16 15 7 10 5 5 5 5 5 5 4 4 1 5 6 10 10 10 5 5 6 6 10 10 10 2 10 9 1 9 9 10 10 10 10 6 6 5 5 5 4 9 9 9 9 2 2 2 10 5 10 10 10 10 9 9 2 2 2 10 10 10 2 2 2 10 2 2 2 2 22 22 22 22 2 2 2 25 25 12 12 12 8 12 7 12 25 25 8 25 12 12 9 12 25 22 23 22 1 22 23 22 22 22 21 21 21 21 21 21 21 13 2 13 9 9 9 7 5 9 8 9 9 2 2 2 2 14 2 2 21 22 22 6 5 6 6 6 1 1 6 2 2 2 2 2 2 10 9 9 9 7 7 9 9 8 6 5 4 10 6 3 3 3 3 15 15 15 12 12 15 2 15 10 15 15 3 3 3 3 3 3 3 1 1 3 1 1 1 3 20 1 21 21 23 23 20 20 24 39 16 40 24 40 1 40 39 23 20 2 1 1 40 40 40 40 39 40 40 39 39 20 20 20 17 40 40 40 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 /* * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved. * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved. * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved. * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/bug.h> #include <linux/sched/signal.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/splice.h> #include <crypto/aead.h> #include <net/strparser.h> #include <net/tls.h> #include <trace/events/sock.h> #include "tls.h" struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; bool async_done; u8 tail; ); struct sk_buff *skb; }; struct tls_decrypt_ctx { struct sock *sk; u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; bool free_sgout; struct scatterlist sg[]; }; noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); /* sk->sk_err should contain a positive error code. */ WRITE_ONCE(sk->sk_err, -err); /* Paired with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); } static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { int start = skb_headlen(skb); int i, chunk = start - offset; struct sk_buff *frag_iter; int elt = 0; if (unlikely(recursion_level >= 24)) return -EMSGSIZE; if (chunk > 0) { if (chunk > len) chunk = len; elt++; len -= chunk; if (len == 0) return elt; offset += chunk; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); chunk = end - offset; if (chunk > 0) { if (chunk > len) chunk = len; elt++; len -= chunk; if (len == 0) return elt; offset += chunk; } start = end; } if (unlikely(skb_has_frag_list(skb))) { skb_walk_frags(skb, frag_iter) { int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; chunk = end - offset; if (chunk > 0) { if (chunk > len) chunk = len; ret = __skb_nsg(frag_iter, offset - start, chunk, recursion_level + 1); if (unlikely(ret < 0)) return ret; elt += ret; len -= chunk; if (len == 0) return elt; offset += chunk; } start = end; } } BUG_ON(len); return elt; } /* Return the number of scatterlist elements required to completely map the * skb, or -EMSGSIZE if the recursion depth is exceeded. */ static int skb_nsg(struct sk_buff *skb, int offset, int len) { return __skb_nsg(skb, offset, len, 0); } static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb, struct tls_decrypt_arg *darg) { struct strp_msg *rxm = strp_msg(skb); struct tls_msg *tlm = tls_msg(skb); int sub = 0; /* Determine zero-padding length */ if (prot->version == TLS_1_3_VERSION) { int offset = rxm->full_len - TLS_TAG_SIZE - 1; char content_type = darg->zc ? darg->tail : 0; int err; while (content_type == 0) { if (offset < prot->prepend_size) return -EBADMSG; err = skb_copy_bits(skb, rxm->offset + offset, &content_type, 1); if (err) return err; if (content_type) break; sub++; offset--; } tlm->control = content_type; } return sub; } static void tls_decrypt_done(void *data, int err) { struct aead_request *aead_req = data; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; struct scatterlist *sg; unsigned int pages; struct sock *sk; int aead_size; /* If requests get too backlogged crypto API returns -EBUSY and calls * ->complete(-EINPROGRESS) immediately followed by ->complete(0) * to make waiting for backlog to flush with crypto_wait_req() easier. * First wait converts -EBUSY -> -EINPROGRESS, and the second one * -EINPROGRESS -> 0. * We have a single struct crypto_async_request per direction, this * scheme doesn't help us, so just ignore the first ->complete(). */ if (err == -EINPROGRESS) return; aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); sk = dctx->sk; tls_ctx = tls_get_ctx(sk); ctx = tls_sw_ctx_rx(tls_ctx); /* Propagate if there was an err */ if (err) { if (err == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); ctx->async_wait.err = err; tls_err_abort(sk, err); } /* Free the destination pages if skb was not decrypted inplace */ if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) break; put_page(sg_page(sg)); } } kfree(aead_req); if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); } static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) { if (!atomic_dec_and_test(&ctx->decrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->decrypt_pending); __skb_queue_purge(&ctx->async_hold); return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, char *iv_recv, size_t data_len, struct aead_request *aead_req, struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); int ret; aead_request_set_tfm(aead_req, ctx->aead_recv); aead_request_set_ad(aead_req, prot->aad_size); aead_request_set_crypt(aead_req, sgin, sgout, data_len + prot->tag_size, (u8 *)iv_recv); if (darg->async) { aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { DECLARE_CRYPTO_WAIT(wait); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS || ret == -EBUSY) ret = crypto_wait_req(ret, &wait); return ret; } ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS) return 0; if (ret == -EBUSY) { ret = tls_decrypt_async_wait(ctx); darg->async_done = true; /* all completions have run, we're not doing async anymore */ darg->async = false; return ret; } atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; } static void tls_trim_both_msgs(struct sock *sk, int target_size) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; sk_msg_trim(sk, &rec->msg_plaintext, target_size); if (target_size > 0) target_size += prot->overhead_size; sk_msg_trim(sk, &rec->msg_encrypted, target_size); } static int tls_alloc_encrypted_msg(struct sock *sk, int len) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; return sk_msg_alloc(sk, msg_en, len, 0); } static int tls_clone_plaintext_msg(struct sock *sk, int required) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_pl = &rec->msg_plaintext; struct sk_msg *msg_en = &rec->msg_encrypted; int skip, len; /* We add page references worth len bytes from encrypted sg * at the end of plaintext sg. It is guaranteed that msg_en * has enough required room (ensured by caller). */ len = required - msg_pl->sg.size; /* Skip initial bytes in msg_en's data to be able to use * same offset of both plain and encrypted data. */ skip = prot->prepend_size + msg_pl->sg.size; return sk_msg_clone(sk, msg_pl, msg_en, skip, len); } static struct tls_rec *tls_get_rec(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int mem_size; mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send); rec = kzalloc(mem_size, sk->sk_allocation); if (!rec) return NULL; msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; sk_msg_init(msg_pl); sk_msg_init(msg_en); sg_init_table(rec->sg_aead_in, 2); sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_in[1]); sg_init_table(rec->sg_aead_out, 2); sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_out[1]); rec->sk = sk; return rec; } static void tls_free_rec(struct sock *sk, struct tls_rec *rec) { sk_msg_free(sk, &rec->msg_encrypted); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } static void tls_free_open_rec(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; if (rec) { tls_free_rec(sk, rec); ctx->open_rec = NULL; } } int tls_tx_records(struct sock *sk, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; struct sk_msg *msg_en; int tx_flags, rc = 0; if (tls_is_partially_sent_record(tls_ctx)) { rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); if (flags == -1) tx_flags = rec->tx_flags; else tx_flags = flags; rc = tls_push_partial_record(sk, tls_ctx, tx_flags); if (rc) goto tx_err; /* Full record has been transmitted. * Remove the head of tx_list */ list_del(&rec->list); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } /* Tx all ready records */ list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) { if (READ_ONCE(rec->tx_ready)) { if (flags == -1) tx_flags = rec->tx_flags; else tx_flags = flags; msg_en = &rec->msg_encrypted; rc = tls_push_sg(sk, tls_ctx, &msg_en->sg.data[msg_en->sg.curr], 0, tx_flags); if (rc) goto tx_err; list_del(&rec->list); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } else { break; } } tx_err: if (rc < 0 && rc != -EAGAIN) tls_err_abort(sk, rc); return rc; } static void tls_encrypt_done(void *data, int err) { struct tls_sw_context_tx *ctx; struct tls_context *tls_ctx; struct tls_prot_info *prot; struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; struct sock *sk; if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ return; msg_en = &rec->msg_encrypted; sk = rec->sk; tls_ctx = tls_get_ctx(sk); prot = &tls_ctx->prot_info; ctx = tls_sw_ctx_tx(tls_ctx); sge = sk_msg_elem(msg_en, msg_en->sg.curr); sge->offset -= prot->prepend_size; sge->length += prot->prepend_size; /* Check if error is previously set on socket */ if (err || sk->sk_err) { rec = NULL; /* If err is already set on socket, return the same code */ if (sk->sk_err) { ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); } } if (rec) { struct tls_rec *first_rec; /* Mark the record as ready for transmission */ smp_store_mb(rec->tx_ready, true); /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); if (rec == first_rec) { /* Schedule the transmission */ if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) schedule_delayed_work(&ctx->tx_work.work, 1); } } if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); } static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) { if (!atomic_dec_and_test(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); atomic_inc(&ctx->encrypt_pending); return ctx->async_wait.err; } static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, struct aead_request *aead_req, size_t data_len, u32 start) { struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; struct scatterlist *sge = sk_msg_elem(msg_en, start); int rc, iv_offset = 0; /* For CCM based ciphers, first byte of IV is a constant */ switch (prot->cipher_type) { case TLS_CIPHER_AES_CCM_128: rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE; iv_offset = 1; break; case TLS_CIPHER_SM4_CCM: rec->iv_data[0] = TLS_SM4_CCM_IV_B0_BYTE; iv_offset = 1; break; } memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); tls_xor_iv_with_seq(prot, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; msg_en->sg.curr = start; aead_request_set_tfm(aead_req, ctx->aead_send); aead_request_set_ad(aead_req, prot->aad_size); aead_request_set_crypt(aead_req, rec->sg_aead_in, rec->sg_aead_out, data_len, rec->iv_data); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_encrypt_done, rec); /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); if (rc == -EBUSY) { rc = tls_encrypt_async_wait(ctx); rc = rc ?: -EINPROGRESS; /* * The async callback tls_encrypt_done() has already * decremented encrypt_pending and restored the sge on * both success and error. Skip the synchronous cleanup * below on error, just remove the record and return. */ if (rc != -EINPROGRESS) { list_del(&rec->list); return rc; } } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; sge->length += prot->prepend_size; } if (!rc) { WRITE_ONCE(rec->tx_ready, true); } else if (rc != -EINPROGRESS) { list_del(&rec->list); return rc; } /* Unhook the record from context if encryption is not failure */ ctx->open_rec = NULL; tls_advance_record_sn(sk, prot, &tls_ctx->tx); return rc; } static int tls_split_open_record(struct sock *sk, struct tls_rec *from, struct tls_rec **to, struct sk_msg *msg_opl, struct sk_msg *msg_oen, u32 split_point, u32 tx_overhead_size, u32 *orig_end) { u32 i, j, bytes = 0, apply = msg_opl->apply_bytes; struct scatterlist *sge, *osge, *nsge; u32 orig_size = msg_opl->sg.size; struct scatterlist tmp = { }; struct sk_msg *msg_npl; struct tls_rec *new; int ret; new = tls_get_rec(sk); if (!new) return -ENOMEM; ret = sk_msg_alloc(sk, &new->msg_encrypted, msg_opl->sg.size + tx_overhead_size, 0); if (ret < 0) { tls_free_rec(sk, new); return ret; } *orig_end = msg_opl->sg.end; i = msg_opl->sg.start; sge = sk_msg_elem(msg_opl, i); while (apply && sge->length) { if (sge->length > apply) { u32 len = sge->length - apply; get_page(sg_page(sge)); sg_set_page(&tmp, sg_page(sge), len, sge->offset + apply); sge->length = apply; bytes += apply; apply = 0; } else { apply -= sge->length; bytes += sge->length; } sk_msg_iter_var_next(i); if (i == msg_opl->sg.end) break; sge = sk_msg_elem(msg_opl, i); } msg_opl->sg.end = i; msg_opl->sg.curr = i; msg_opl->sg.copybreak = 0; msg_opl->apply_bytes = 0; msg_opl->sg.size = bytes; msg_npl = &new->msg_plaintext; msg_npl->apply_bytes = apply; msg_npl->sg.size = orig_size - bytes; j = msg_npl->sg.start; nsge = sk_msg_elem(msg_npl, j); if (tmp.length) { memcpy(nsge, &tmp, sizeof(*nsge)); sk_msg_iter_var_next(j); nsge = sk_msg_elem(msg_npl, j); } osge = sk_msg_elem(msg_opl, i); while (osge->length) { memcpy(nsge, osge, sizeof(*nsge)); sg_unmark_end(nsge); sk_msg_iter_var_next(i); sk_msg_iter_var_next(j); if (i == *orig_end) break; osge = sk_msg_elem(msg_opl, i); nsge = sk_msg_elem(msg_npl, j); } msg_npl->sg.end = j; msg_npl->sg.curr = j; msg_npl->sg.copybreak = 0; *to = new; return 0; } static void tls_merge_open_record(struct sock *sk, struct tls_rec *to, struct tls_rec *from, u32 orig_end) { struct sk_msg *msg_npl = &from->msg_plaintext; struct sk_msg *msg_opl = &to->msg_plaintext; struct scatterlist *osge, *nsge; u32 i, j; i = msg_opl->sg.end; sk_msg_iter_var_prev(i); j = msg_npl->sg.start; osge = sk_msg_elem(msg_opl, i); nsge = sk_msg_elem(msg_npl, j); if (sg_page(osge) == sg_page(nsge) && osge->offset + osge->length == nsge->offset) { osge->length += nsge->length; put_page(sg_page(nsge)); } msg_opl->sg.end = orig_end; msg_opl->sg.curr = orig_end; msg_opl->sg.copybreak = 0; msg_opl->apply_bytes = msg_opl->sg.size + msg_npl->sg.size; msg_opl->sg.size += msg_npl->sg.size; sk_msg_free(sk, &to->msg_encrypted); sk_msg_xfer_full(&to->msg_encrypted, &from->msg_encrypted); kfree(from); } static int tls_push_record(struct sock *sk, int flags, unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec, *tmp = NULL; u32 i, split_point, orig_end; struct sk_msg *msg_pl, *msg_en; struct aead_request *req; bool split; int rc; if (!rec) return 0; msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; split_point = msg_pl->apply_bytes; split = split_point && split_point < msg_pl->sg.size; if (unlikely((!split && msg_pl->sg.size + prot->overhead_size > msg_en->sg.size) || (split && split_point + prot->overhead_size > msg_en->sg.size))) { split = true; split_point = msg_en->sg.size; } if (split) { rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en, split_point, prot->overhead_size, &orig_end); if (rc < 0) return rc; /* This can happen if above tls_split_open_record allocates * a single large encryption buffer instead of two smaller * ones. In this case adjust pointers and continue without * split. */ if (!msg_pl->sg.size) { tls_merge_open_record(sk, rec, tmp, orig_end); msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; split = false; } sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); } rec->tx_flags = flags; req = &rec->aead_req; i = msg_pl->sg.end; sk_msg_iter_var_prev(i); rec->content_type = record_type; if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, &rec->sg_content_type); } else { sg_mark_end(sk_msg_elem(msg_pl, i)); } if (msg_pl->sg.end < msg_pl->sg.start) { sg_chain(&msg_pl->sg.data[msg_pl->sg.start], MAX_SKB_FRAGS - msg_pl->sg.start + 1, msg_pl->sg.data); } i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); i = msg_en->sg.end; sk_msg_iter_var_prev(i); sg_mark_end(sk_msg_elem(msg_en, i)); i = msg_en->sg.start; sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]); tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size, tls_ctx->tx.rec_seq, record_type, prot); tls_fill_prepend(tls_ctx, page_address(sg_page(&msg_en->sg.data[i])) + msg_en->sg.data[i].offset, msg_pl->sg.size + prot->tail_size, record_type); tls_ctx->pending_open_record_frags = false; rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); } } ctx->async_capable = 1; return rc; } else if (split) { msg_pl = &tmp->msg_plaintext; msg_en = &tmp->msg_encrypted; sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); tls_ctx->pending_open_record_frags = true; ctx->open_rec = tmp; } return tls_tx_records(sk, flags); } static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, bool full_record, u8 record_type, ssize_t *copied, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct sk_msg msg_redir = { }; struct sk_psock *psock; struct sock *sk_redir; struct tls_rec *rec; bool enospc, policy, redir_ingress; int err = 0, send; u32 delta = 0; policy = !(flags & MSG_SENDPAGE_NOPOLICY); psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; } if (psock) sk_psock_put(sk, psock); return err; } more_data: enospc = sk_msg_full(msg); if (psock->eval == __SK_NONE) { delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); delta -= msg->sg.size; if ((s32)delta > 0) { /* It indicates that we executed bpf_msg_pop_data(), * causing the plaintext data size to decrease. * Therefore the encrypted data size also needs to * correspondingly decrease. We only need to subtract * delta to calculate the new ciphertext length since * ktls does not support block encryption. */ struct sk_msg *enc = &ctx->open_rec->msg_encrypted; sk_msg_trim(sk, enc, enc->sg.size - delta); } } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { err = -ENOSPC; goto out_err; } msg->cork_bytes = 0; send = msg->sg.size; if (msg->apply_bytes && msg->apply_bytes < send) send = msg->apply_bytes; switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; goto out_err; } break; case __SK_REDIRECT: redir_ingress = psock->redir_ingress; sk_redir = psock->sk_redir; memcpy(&msg_redir, msg, sizeof(*msg)); if (msg->apply_bytes < send) msg->apply_bytes = 0; else msg->apply_bytes -= send; sk_msg_return_zero(sk, msg, send); msg->sg.size -= send; release_sock(sk); err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, &msg_redir, send, flags); lock_sock(sk); if (err < 0) { /* Regardless of whether the data represented by * msg_redir is sent successfully, we have already * uncharged it via sk_msg_return_zero(). The * msg->sg.size represents the remaining unprocessed * data, which needs to be uncharged here. */ sk_mem_uncharge(sk, msg->sg.size); *copied -= sk_msg_free_nocharge(sk, &msg_redir); msg->sg.size = 0; } if (msg->sg.size == 0) tls_free_open_rec(sk); break; case __SK_DROP: default: sk_msg_free_partial(sk, msg, send); if (msg->apply_bytes < send) msg->apply_bytes = 0; else msg->apply_bytes -= send; if (msg->sg.size == 0) tls_free_open_rec(sk); *copied -= (send + delta); err = -EACCES; } if (likely(!err)) { bool reset_eval = !ctx->open_rec; rec = ctx->open_rec; if (rec) { msg = &rec->msg_plaintext; if (!msg->apply_bytes) reset_eval = true; } if (reset_eval) { psock->eval = __SK_NONE; if (psock->sk_redir) { sock_put(psock->sk_redir); psock->sk_redir = NULL; } } if (rec) goto more_data; } out_err: sk_psock_put(sk, psock); return err; } static int tls_sw_push_pending_record(struct sock *sk, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_pl; size_t copied; if (!rec) return 0; msg_pl = &rec->msg_plaintext; copied = msg_pl->sg.size; if (!copied) return 0; return bpf_exec_tx_verdict(msg_pl, sk, true, TLS_RECORD_TYPE_DATA, &copied, flags); } static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, struct sk_msg *msg_pl, size_t try_to_copy, ssize_t *copied) { struct page *page = NULL, **pages = &page; do { ssize_t part; size_t off; part = iov_iter_extract_pages(&msg->msg_iter, &pages, try_to_copy, 1, 0, &off); if (part <= 0) return part ?: -EIO; if (WARN_ON_ONCE(!sendpage_ok(page))) { iov_iter_revert(&msg->msg_iter, part); return -EIO; } sk_msg_page_add(msg_pl, page, part, off); msg_pl->sg.copybreak = 0; msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, part); *copied += part; try_to_copy -= part; } while (try_to_copy && !sk_msg_full(msg_pl)); return 0; } static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); bool async_capable = ctx->async_capable; unsigned char record_type = TLS_RECORD_TYPE_DATA; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); size_t try_to_copy; ssize_t copied = 0; struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int required_size; int num_async = 0; bool full_record; int record_room; int num_zc = 0; int orig_size; int ret = 0; if (!eor && (msg->msg_flags & MSG_EOR)) return -EINVAL; if (unlikely(msg->msg_controllen)) { ret = tls_process_cmsg(sk, msg, &record_type); if (ret) { if (ret == -EINPROGRESS) num_async++; else if (ret != -EAGAIN) goto end; } } while (msg_data_left(msg)) { if (sk->sk_err) { ret = -sk->sk_err; goto send_end; } if (ctx->open_rec) rec = ctx->open_rec; else rec = ctx->open_rec = tls_get_rec(sk); if (!rec) { ret = -ENOMEM; goto send_end; } msg_pl = &rec->msg_plaintext; msg_en = &rec->msg_encrypted; orig_size = msg_pl->sg.size; full_record = false; try_to_copy = msg_data_left(msg); record_room = tls_ctx->tx_max_payload_len - msg_pl->sg.size; if (try_to_copy >= record_room) { try_to_copy = record_room; full_record = true; } required_size = msg_pl->sg.size + try_to_copy + prot->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; alloc_encrypted: ret = tls_alloc_encrypted_msg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto wait_for_memory; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ try_to_copy -= required_size - msg_en->sg.size; full_record = true; } if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) { ret = tls_sw_sendmsg_splice(sk, msg, msg_pl, try_to_copy, &copied); if (ret < 0) goto send_end; tls_ctx->pending_open_record_frags = true; if (sk_msg_full(msg_pl)) { full_record = true; sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); } if (full_record || eor) goto copied; continue; } if (!is_kvec && (full_record || eor) && !async_capable) { u32 first = msg_pl->sg.end; ret = sk_msg_zerocopy_from_iter(sk, &msg->msg_iter, msg_pl, try_to_copy); if (ret) goto fallback_to_reg_send; num_zc++; copied += try_to_copy; sk_msg_sg_copy_set(msg_pl, first); ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, msg->msg_flags); if (ret) { if (ret == -EINPROGRESS) num_async++; else if (ret == -ENOMEM) goto wait_for_memory; else if (ctx->open_rec && ret == -ENOSPC) { if (msg_pl->cork_bytes) { ret = 0; goto send_end; } goto rollback_iter; } else if (ret != -EAGAIN) goto send_end; } /* Transmit if any encryptions have completed */ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { cancel_delayed_work(&ctx->tx_work.work); tls_tx_records(sk, msg->msg_flags); } continue; rollback_iter: copied -= try_to_copy; sk_msg_sg_copy_clear(msg_pl, first); iov_iter_revert(&msg->msg_iter, msg_pl->sg.size - orig_size); fallback_to_reg_send: sk_msg_trim(sk, msg_pl, orig_size); } required_size = msg_pl->sg.size + try_to_copy; ret = tls_clone_plaintext_msg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto send_end; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ try_to_copy -= required_size - msg_pl->sg.size; full_record = true; sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size); } if (try_to_copy) { ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_pl, try_to_copy); if (ret < 0) goto trim_sgl; } /* Open records defined only if successfully copied, otherwise * we would trim the sg but not reset the open record frags. */ tls_ctx->pending_open_record_frags = true; copied += try_to_copy; copied: if (full_record || eor) { ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, msg->msg_flags); if (ret) { if (ret == -EINPROGRESS) num_async++; else if (ret == -ENOMEM) goto wait_for_memory; else if (ret != -EAGAIN) { if (ret == -ENOSPC) ret = 0; goto send_end; } } /* Transmit if any encryptions have completed */ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { cancel_delayed_work(&ctx->tx_work.work); tls_tx_records(sk, msg->msg_flags); } } continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { trim_sgl: if (ctx->open_rec) tls_trim_both_msgs(sk, orig_size); goto send_end; } if (ctx->open_rec && msg_en->sg.size < required_size) goto alloc_encrypted; } send_end: if (!num_async) { goto end; } else if (num_zc || eor) { int err; /* Wait for pending encryptions to get completed */ err = tls_encrypt_async_wait(ctx); if (err) { ret = err; copied = 0; } } /* Transmit if any encryptions have completed */ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { cancel_delayed_work(&ctx->tx_work.work); tls_tx_records(sk, msg->msg_flags); } end: ret = sk_stream_error(sk, msg->msg_flags, ret); return copied > 0 ? copied : ret; } int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tls_context *tls_ctx = tls_get_ctx(sk); int ret; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | MSG_EOR | MSG_SENDPAGE_NOPOLICY)) return -EOPNOTSUPP; ret = mutex_lock_interruptible(&tls_ctx->tx_lock); if (ret) return ret; lock_sock(sk); ret = tls_sw_sendmsg_locked(sk, msg, size); release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); return ret; } /* * Handle unexpected EOF during splice without SPLICE_F_MORE set. */ void tls_sw_splice_eof(struct socket *sock) { struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec; struct sk_msg *msg_pl; ssize_t copied = 0; bool retrying = false; int ret = 0; if (!ctx->open_rec) return; mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); retry: /* same checks as in tls_sw_push_pending_record() */ rec = ctx->open_rec; if (!rec) goto unlock; msg_pl = &rec->msg_plaintext; if (msg_pl->sg.size == 0) goto unlock; /* Check the BPF advisor and perform transmission. */ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, &copied, 0); switch (ret) { case 0: case -EAGAIN: if (retrying) goto unlock; retrying = true; goto retry; case -EINPROGRESS: break; default: goto unlock; } /* Wait for pending encryptions to get completed */ if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { cancel_delayed_work(&ctx->tx_work.work); tls_tx_records(sk, 0); } unlock: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); } static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, bool released) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); int ret = 0; long timeo; /* a rekey is pending, let userspace deal with it */ if (unlikely(ctx->key_update_pending)) return -EKEYEXPIRED; timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { if (!sk_psock_queue_empty(psock)) return 0; if (sk->sk_err) return sock_error(sk); if (ret < 0) return ret; if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) break; } if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; if (sock_flag(sk, SOCK_DONE)) return 0; if (!timeo) return -EAGAIN; released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); ret = sk_wait_event(sk, &timeo, tls_strp_msg_ready(ctx) || !sk_psock_queue_empty(psock), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); /* Handle signals */ if (signal_pending(current)) return sock_intr_errno(timeo); } if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) return tls_rx_rec_wait(sk, psock, nonblock, false); return 1; } static int tls_setup_from_iter(struct iov_iter *from, int length, int *pages_used, struct scatterlist *to, int to_max_pages) { int rc = 0, i = 0, num_elem = *pages_used, maxpages; struct page *pages[MAX_SKB_FRAGS]; unsigned int size = 0; ssize_t copied, use; size_t offset; while (length > 0) { i = 0; maxpages = to_max_pages - num_elem; if (maxpages == 0) { rc = -EFAULT; goto out; } copied = iov_iter_get_pages2(from, pages, length, maxpages, &offset); if (copied <= 0) { rc = -EFAULT; goto out; } length -= copied; size += copied; while (copied) { use = min_t(int, copied, PAGE_SIZE - offset); sg_set_page(&to[num_elem], pages[i], use, offset); sg_unmark_end(&to[num_elem]); /* We do not uncharge memory from this API */ offset = 0; copied -= use; i++; num_elem++; } } /* Mark the end in the last sg entry if newly added */ if (num_elem > *pages_used) sg_mark_end(&to[num_elem - 1]); out: if (rc) iov_iter_revert(from, size); *pages_used = num_elem; return rc; } static struct sk_buff * tls_alloc_clrtxt_skb(struct sock *sk, struct sk_buff *skb, unsigned int full_len) { struct strp_msg *clr_rxm; struct sk_buff *clr_skb; int err; clr_skb = alloc_skb_with_frags(0, full_len, TLS_PAGE_ORDER, &err, sk->sk_allocation); if (!clr_skb) return NULL; skb_copy_header(clr_skb, skb); clr_skb->len = full_len; clr_skb->data_len = full_len; clr_rxm = strp_msg(clr_skb); clr_rxm->offset = 0; return clr_skb; } /* Decrypt handlers * * tls_decrypt_sw() and tls_decrypt_device() are decrypt handlers. * They must transform the darg in/out argument are as follows: * | Input | Output * ------------------------------------------------------------------- * zc | Zero-copy decrypt allowed | Zero-copy performed * async | Async decrypt allowed | Async crypto used / in progress * skb | * | Output skb * * If ZC decryption was performed darg.skb will point to the input skb. */ /* This function decrypts the input skb into either out_iov or in out_sg * or in skb buffers itself. The input parameter 'darg->zc' indicates if * zero-copy mode needs to be tried or not. With zero-copy mode, either * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are * NULL, then the decryption happens inside skb buffers itself, i.e. * zero-copy gets disabled and 'darg->zc' is updated. */ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, struct scatterlist *out_sg, struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; int n_sgin, n_sgout, aead_size, err, pages = 0; struct sk_buff *skb = tls_strp_msg(ctx); const struct strp_msg *rxm = strp_msg(skb); const struct tls_msg *tlm = tls_msg(skb); struct aead_request *aead_req; struct scatterlist *sgin = NULL; struct scatterlist *sgout = NULL; const int data_len = rxm->full_len - prot->overhead_size; int tail_pages = !!prot->tail_size; struct tls_decrypt_ctx *dctx; struct sk_buff *clear_skb; int iv_offset = 0; u8 *mem; n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size, rxm->full_len - prot->prepend_size); if (n_sgin < 1) return n_sgin ?: -EBADMSG; if (darg->zc && (out_iov || out_sg)) { clear_skb = NULL; if (out_iov) n_sgout = 1 + tail_pages + iov_iter_npages_cap(out_iov, INT_MAX, data_len); else n_sgout = sg_nents(out_sg); } else { darg->zc = false; clear_skb = tls_alloc_clrtxt_skb(sk, skb, rxm->full_len); if (!clear_skb) return -ENOMEM; n_sgout = 1 + skb_shinfo(clear_skb)->nr_frags; } /* Increment to accommodate AAD */ n_sgin = n_sgin + 1; /* Allocate a single block of memory which contains * aead_req || tls_decrypt_ctx. * Both structs are variable length. */ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); aead_size = ALIGN(aead_size, __alignof__(*dctx)); mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)), sk->sk_allocation); if (!mem) { err = -ENOMEM; goto exit_free_skb; } /* Segment the allocated memory */ aead_req = (struct aead_request *)mem; dctx = (struct tls_decrypt_ctx *)(mem + aead_size); dctx->sk = sk; sgin = &dctx->sg[0]; sgout = &dctx->sg[n_sgin]; /* For CCM based ciphers, first byte of nonce+iv is a constant */ switch (prot->cipher_type) { case TLS_CIPHER_AES_CCM_128: dctx->iv[0] = TLS_AES_CCM_IV_B0_BYTE; iv_offset = 1; break; case TLS_CIPHER_SM4_CCM: dctx->iv[0] = TLS_SM4_CCM_IV_B0_BYTE; iv_offset = 1; break; } /* Prepare IV */ if (prot->version == TLS_1_3_VERSION || prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { memcpy(&dctx->iv[iv_offset], tls_ctx->rx.iv, prot->iv_size + prot->salt_size); } else { err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, &dctx->iv[iv_offset] + prot->salt_size, prot->iv_size); if (err < 0) goto exit_free; memcpy(&dctx->iv[iv_offset], tls_ctx->rx.iv, prot->salt_size); } tls_xor_iv_with_seq(prot, &dctx->iv[iv_offset], tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(dctx->aad, rxm->full_len - prot->overhead_size + prot->tail_size, tls_ctx->rx.rec_seq, tlm->control, prot); /* Prepare sgin */ sg_init_table(sgin, n_sgin); sg_set_buf(&sgin[0], dctx->aad, prot->aad_size); err = skb_to_sgvec(skb, &sgin[1], rxm->offset + prot->prepend_size, rxm->full_len - prot->prepend_size); if (err < 0) goto exit_free; if (clear_skb) { sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], dctx->aad, prot->aad_size); err = skb_to_sgvec(clear_skb, &sgout[1], prot->prepend_size, data_len + prot->tail_size); if (err < 0) goto exit_free; } else if (out_iov) { sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], dctx->aad, prot->aad_size); err = tls_setup_from_iter(out_iov, data_len, &pages, &sgout[1], (n_sgout - 1 - tail_pages)); if (err < 0) goto exit_free_pages; if (prot->tail_size) { sg_unmark_end(&sgout[pages]); sg_set_buf(&sgout[pages + 1], &dctx->tail, prot->tail_size); sg_mark_end(&sgout[pages + 1]); } } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); if (err) { if (darg->async_done) goto exit_free_skb; goto exit_free_pages; } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; if (unlikely(darg->async)) { err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold); if (err) { err = tls_decrypt_async_wait(ctx); darg->async = false; } return err; } if (unlikely(darg->async_done)) return 0; if (prot->tail_size) darg->tail = dctx->tail; exit_free_pages: /* Release the pages in case iov was mapped to pages */ for (; pages > 0; pages--) put_page(sg_page(&sgout[pages])); exit_free: kfree(mem); exit_free_skb: consume_skb(clear_skb); return err; } static int tls_decrypt_sw(struct sock *sk, struct tls_context *tls_ctx, struct msghdr *msg, struct tls_decrypt_arg *darg) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int pad, err; err = tls_decrypt_sg(sk, &msg->msg_iter, NULL, darg); if (err < 0) { if (err == -EBADMSG) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); return err; } /* keep going even for ->async, the code below is TLS 1.3 */ /* If opportunistic TLS 1.3 ZC failed retry without ZC */ if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION && darg->tail != TLS_RECORD_TYPE_DATA)) { darg->zc = false; if (!darg->tail) TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXNOPADVIOL); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTRETRY); return tls_decrypt_sw(sk, tls_ctx, msg, darg); } pad = tls_padding_length(prot, darg->skb, darg); if (pad < 0) { if (darg->skb != tls_strp_msg(ctx)) consume_skb(darg->skb); return pad; } rxm = strp_msg(darg->skb); rxm->full_len -= pad; return 0; } static int tls_decrypt_device(struct sock *sk, struct msghdr *msg, struct tls_context *tls_ctx, struct tls_decrypt_arg *darg) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int pad, err; if (tls_ctx->rx_conf != TLS_HW) return 0; err = tls_device_decrypted(sk, tls_ctx); if (err <= 0) return err; pad = tls_padding_length(prot, tls_strp_msg(ctx), darg); if (pad < 0) return pad; darg->async = false; darg->skb = tls_strp_msg(ctx); /* ->zc downgrade check, in case TLS 1.3 gets here */ darg->zc &= !(prot->version == TLS_1_3_VERSION && tls_msg(darg->skb)->control != TLS_RECORD_TYPE_DATA); rxm = strp_msg(darg->skb); rxm->full_len -= pad; if (!darg->zc) { /* Non-ZC case needs a real skb */ darg->skb = tls_strp_msg_detach(ctx); if (!darg->skb) return -ENOMEM; } else { unsigned int off, len; /* In ZC case nobody cares about the output skb. * Just copy the data here. Note the skb is not fully trimmed. */ off = rxm->offset + prot->prepend_size; len = rxm->full_len - prot->overhead_size; err = skb_copy_datagram_msg(darg->skb, off, msg, len); if (err) return err; } return 1; } static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx, struct sk_buff *skb) { const struct strp_msg *rxm = strp_msg(skb); const struct tls_msg *tlm = tls_msg(skb); char hs_type; int err; if (likely(tlm->control != TLS_RECORD_TYPE_HANDSHAKE)) return 0; if (rxm->full_len < 1) return 0; err = skb_copy_bits(skb, rxm->offset, &hs_type, 1); if (err < 0) { DEBUG_NET_WARN_ON_ONCE(1); return err; } if (hs_type == TLS_HANDSHAKE_KEYUPDATE) { struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; WRITE_ONCE(rx_ctx->key_update_pending, true); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED); } return 0; } static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm; int err; err = tls_decrypt_device(sk, msg, tls_ctx, darg); if (!err) err = tls_decrypt_sw(sk, tls_ctx, msg, darg); if (err < 0) return err; rxm = strp_msg(darg->skb); rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); return tls_check_pending_rekey(sk, tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) { struct tls_decrypt_arg darg = { .zc = true, }; return tls_decrypt_sg(sk, NULL, sgout, &darg); } /* All records returned from a recvmsg() call must have the same type. * 0 is not a valid content type. Use it as "no type reported, yet". */ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { int err; if (!*control) { *control = tlm->control; if (!*control) return -EBADMSG; err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, sizeof(*control), control); if (*control != TLS_RECORD_TYPE_DATA) { if (err || msg->msg_flags & MSG_CTRUNC) return -EIO; } } else if (*control != tlm->control) { return 0; } return 1; } static void tls_rx_rec_done(struct tls_sw_context_rx *ctx) { tls_strp_msg_done(&ctx->strp); } /* This function traverses the rx_list in tls receive context to copies the * decrypted records into the buffer provided by caller zero copy is not * true. Further, the records are removed from the rx_list if it is not a peek * case and the record has been consumed completely. */ static int process_rx_list(struct tls_sw_context_rx *ctx, struct msghdr *msg, u8 *control, size_t skip, size_t len, bool is_peek, bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; ssize_t copied = 0; int err; while (skip && skb) { struct strp_msg *rxm = strp_msg(skb); tlm = tls_msg(skb); err = tls_record_content_type(msg, tlm, control); if (err <= 0) goto more; if (skip < rxm->full_len) break; skip = skip - rxm->full_len; skb = skb_peek_next(skb, &ctx->rx_list); } while (len && skb) { struct sk_buff *next_skb; struct strp_msg *rxm = strp_msg(skb); int chunk = min_t(unsigned int, rxm->full_len - skip, len); tlm = tls_msg(skb); err = tls_record_content_type(msg, tlm, control); if (err <= 0) goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) goto more; len = len - chunk; copied = copied + chunk; /* Consume the data from record if it is non-peek case*/ if (!is_peek) { rxm->offset = rxm->offset + chunk; rxm->full_len = rxm->full_len - chunk; /* Return if there is unconsumed data in the record */ if (rxm->full_len - skip) break; } /* The remaining skip-bytes must lie in 1st record in rx_list. * So from the 2nd record, 'skip' should be 0. */ skip = 0; if (msg) msg->msg_flags |= MSG_EOR; next_skb = skb_peek_next(skb, &ctx->rx_list); if (!is_peek) { __skb_unlink(skb, &ctx->rx_list); consume_skb(skb); } skb = next_skb; } err = 0; out: return copied ? : err; more: if (more) *more = true; goto out; } static bool tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, size_t len_left, size_t decrypted, ssize_t done, size_t *flushed_at) { size_t max_rec; if (len_left <= decrypted) return false; max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE; if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec) return false; *flushed_at = done; return sk_flush_backlog(sk); } static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; int ret; timeo = sock_rcvtimeo(sk, nonblock); while (unlikely(ctx->reader_present)) { DEFINE_WAIT_FUNC(wait, woken_wake_function); ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); ret = sk_wait_event(sk, &timeo, !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); if (timeo <= 0) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(timeo); if (ret < 0) return ret; } WRITE_ONCE(ctx->reader_present, 1); return 0; } static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { int err; lock_sock(sk); err = tls_rx_reader_acquire(sk, ctx, nonblock); if (err) release_sock(sk); return err; } static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx) { if (unlikely(ctx->reader_contended)) { if (wq_has_sleeper(&ctx->wq)) wake_up(&ctx->wq); else ctx->reader_contended = 0; WARN_ON_ONCE(!ctx->reader_present); } WRITE_ONCE(ctx->reader_present, 0); } static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) { tls_rx_reader_release(sk, ctx); release_sock(sk); } int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; ssize_t decrypted = 0, async_copy_bytes = 0; struct sk_psock *psock; unsigned char control = 0; size_t flushed_at = 0; struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ err = ctx->async_wait.err; if (err) goto end; /* Process pending decrypted records. It must be non-zero-copy */ err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; /* process_rx_list() will set @control if it processed any records */ copied = err; if (len <= copied || rx_more || (control && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); len = len - copied; zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && ctx->zc_capable; decrypted = 0; while (len && (decrypted + copied < target || tls_strp_msg_ready(ctx))) { struct tls_decrypt_arg darg; int to_decrypt, chunk; err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, released); if (err <= 0) { if (psock) { chunk = sk_msg_recvmsg(sk, psock, msg, len, flags); if (chunk > 0) { decrypted += chunk; len -= chunk; continue; } } goto recv_end; } memset(&darg.inargs, 0, sizeof(darg.inargs)); rxm = strp_msg(tls_strp_msg(ctx)); tlm = tls_msg(tls_strp_msg(ctx)); to_decrypt = rxm->full_len - prot->overhead_size; if (zc_capable && to_decrypt <= len && tlm->control == TLS_RECORD_TYPE_DATA) darg.zc = true; /* Do not use async mode if record is non-data */ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) darg.async = ctx->async_capable; else darg.async = false; err = tls_rx_one_record(sk, msg, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto recv_end; } async |= darg.async; /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, * but does not match the record type just dequeued, go to end. * We always get record type here since for tls1.2, record type * is known just after record is dequeued from stream parser. * For tls1.3, we disable async. */ err = tls_record_content_type(msg, tls_msg(darg.skb), &control); if (err <= 0) { DEBUG_NET_WARN_ON_ONCE(darg.zc); tls_rx_rec_done(ctx); put_on_rx_list_err: __skb_queue_tail(&ctx->rx_list, darg.skb); goto recv_end; } /* periodically flush backlog, and feed strparser */ released = tls_read_flush_backlog(sk, prot, len, to_decrypt, decrypted + copied, &flushed_at); /* TLS 1.3 may have updated the length by more than overhead */ rxm = strp_msg(darg.skb); chunk = rxm->full_len; tls_rx_rec_done(ctx); if (!darg.zc) { bool partially_consumed = chunk > len; struct sk_buff *skb = darg.skb; DEBUG_NET_WARN_ON_ONCE(darg.skb == ctx->strp.anchor); if (async) { /* TLS 1.2-only, to_decrypt must be text len */ chunk = min_t(int, to_decrypt, len); async_copy_bytes += chunk; put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); if (unlikely(control != TLS_RECORD_TYPE_DATA)) break; continue; } if (bpf_strp_enabled) { released = true; err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; if (err == __SK_DROP) consume_skb(skb); continue; } } if (partially_consumed) chunk = len; err = skb_copy_datagram_msg(skb, rxm->offset, msg, chunk); if (err < 0) goto put_on_rx_list_err; if (is_peek) { peeked += chunk; goto put_on_rx_list; } if (partially_consumed) { rxm->offset += chunk; rxm->full_len -= chunk; goto put_on_rx_list; } consume_skb(skb); } decrypted += chunk; len -= chunk; /* Return full control message to userspace before trying * to parse another message type */ msg->msg_flags |= MSG_EOR; if (control != TLS_RECORD_TYPE_DATA) break; } recv_end: if (async) { int ret; /* Wait for all previously submitted records to be decrypted */ ret = tls_decrypt_async_wait(ctx); if (ret) { if (err >= 0 || err == -EINPROGRESS) err = ret; goto end; } /* Drain records from the rx_list & copy if required */ if (is_peek) err = process_rx_list(ctx, msg, &control, copied + peeked, decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek, NULL); /* we could have copied less than we wanted, and possibly nothing */ decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; end: tls_rx_reader_unlock(sk, ctx); if (psock) sk_psock_put(sk, psock); return copied ? : err; } ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct tls_context *tls_ctx = tls_get_ctx(sock->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = NULL; struct sock *sk = sock->sk; struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; int chunk; int err; err = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK); if (err < 0) return err; if (!skb_queue_empty(&ctx->rx_list)) { skb = __skb_dequeue(&ctx->rx_list); } else { struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, true); if (err <= 0) goto splice_read_end; memset(&darg.inargs, 0, sizeof(darg.inargs)); err = tls_rx_one_record(sk, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto splice_read_end; } tls_rx_rec_done(ctx); skb = darg.skb; } rxm = strp_msg(skb); tlm = tls_msg(skb); /* splice does not support reading control messages */ if (tlm->control != TLS_RECORD_TYPE_DATA) { err = -EINVAL; goto splice_requeue; } chunk = min_t(unsigned int, rxm->full_len, len); copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); if (copied < 0) goto splice_requeue; if (chunk < rxm->full_len) { rxm->offset += len; rxm->full_len -= len; goto splice_requeue; } consume_skb(skb); splice_read_end: tls_rx_reader_unlock(sk, ctx); return copied ? : err; splice_requeue: __skb_queue_head(&ctx->rx_list, skb); goto splice_read_end; } int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t read_actor) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = NULL; struct sk_buff *skb = NULL; struct sk_psock *psock; size_t flushed_at = 0; bool released = true; struct tls_msg *tlm; ssize_t copied = 0; ssize_t decrypted; int err, used; psock = sk_psock_get(sk); if (psock) { sk_psock_put(sk, psock); return -EINVAL; } err = tls_rx_reader_acquire(sk, ctx, true); if (err < 0) return err; /* If crypto failed the connection is broken */ err = ctx->async_wait.err; if (err) goto read_sock_end; decrypted = 0; do { if (!skb_queue_empty(&ctx->rx_list)) { skb = __skb_dequeue(&ctx->rx_list); rxm = strp_msg(skb); tlm = tls_msg(skb); } else { struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, true, released); if (err <= 0) goto read_sock_end; memset(&darg.inargs, 0, sizeof(darg.inargs)); err = tls_rx_one_record(sk, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto read_sock_end; } released = tls_read_flush_backlog(sk, prot, INT_MAX, 0, decrypted, &flushed_at); skb = darg.skb; rxm = strp_msg(skb); tlm = tls_msg(skb); decrypted += rxm->full_len; tls_rx_rec_done(ctx); } /* read_sock does not support reading control messages */ if (tlm->control != TLS_RECORD_TYPE_DATA) { err = -EINVAL; goto read_sock_requeue; } used = read_actor(desc, skb, rxm->offset, rxm->full_len); if (used <= 0) { if (!copied) err = used; goto read_sock_requeue; } copied += used; if (used < rxm->full_len) { rxm->offset += used; rxm->full_len -= used; if (!desc->count) goto read_sock_requeue; } else { consume_skb(skb); if (!desc->count) skb = NULL; } } while (skb); read_sock_end: tls_rx_reader_release(sk, ctx); return copied ? : err; read_sock_requeue: __skb_queue_head(&ctx->rx_list, skb); goto read_sock_end; } bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); bool ingress_empty = true; struct sk_psock *psock; rcu_read_lock(); psock = sk_psock(sk); if (psock) ingress_empty = list_empty(&psock->ingress_msg); rcu_read_unlock(); return !ingress_empty || tls_strp_msg_ready(ctx) || !skb_queue_empty(&ctx->rx_list); } int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; size_t cipher_overhead; size_t data_len = 0; int ret; /* Verify that we have a full TLS header, or wait for more data */ if (strp->stm.offset + prot->prepend_size > skb->len) return 0; /* Sanity-check size of on-stack buffer. */ if (WARN_ON(prot->prepend_size > sizeof(header))) { ret = -EINVAL; goto read_failure; } /* Linearize header to local buffer */ ret = skb_copy_bits(skb, strp->stm.offset, header, prot->prepend_size); if (ret < 0) goto read_failure; strp->mark = header[0]; data_len = ((header[4] & 0xFF) | (header[3] << 8)); cipher_overhead = prot->tag_size; if (prot->version != TLS_1_3_VERSION && prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) cipher_overhead += prot->iv_size; if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead + prot->tail_size) { ret = -EMSGSIZE; goto read_failure; } if (data_len < cipher_overhead) { ret = -EBADMSG; goto read_failure; } /* Note that both TLS1.3 and TLS1.2 use TLS_1_2 version here */ if (header[1] != TLS_1_2_VERSION_MINOR || header[2] != TLS_1_2_VERSION_MAJOR) { ret = -EINVAL; goto read_failure; } tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE, TCP_SKB_CB(skb)->seq + strp->stm.offset); return data_len + TLS_HEADER_SIZE; read_failure: tls_strp_abort_strp(strp, ret); return ret; } void tls_rx_msg_ready(struct tls_strparser *strp) { struct tls_sw_context_rx *ctx; ctx = container_of(strp, struct tls_sw_context_rx, strp); ctx->saved_data_ready(strp->sk); } static void tls_data_ready(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_psock *psock; gfp_t alloc_save; trace_sk_data_ready(sk); alloc_save = sk->sk_allocation; sk->sk_allocation = GFP_ATOMIC; tls_strp_data_ready(&ctx->strp); sk->sk_allocation = alloc_save; psock = sk_psock_get(sk); if (psock) { if (!list_empty(&psock->ingress_msg)) ctx->saved_data_ready(sk); sk_psock_put(sk, psock); } } void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) { struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); disable_delayed_work_sync(&ctx->tx_work.work); } void tls_sw_release_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; /* Wait for any pending async encryptions to complete */ tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); /* Free up un-sent records in tx_list. First, free * the partially sent record if any at head of tx_list. */ if (tls_ctx->partially_sent_record) { tls_free_partial_record(sk, tls_ctx); rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); list_del(&rec->list); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) { list_del(&rec->list); sk_msg_free(sk, &rec->msg_encrypted); sk_msg_free(sk, &rec->msg_plaintext); kfree(rec); } crypto_free_aead(ctx->aead_send); tls_free_open_rec(sk); } void tls_sw_free_ctx_tx(struct tls_context *tls_ctx) { struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); kfree(ctx); } void tls_sw_release_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); if (ctx->aead_recv) { __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); tls_strp_stop(&ctx->strp); /* If tls_sw_strparser_arm() was not called (cleanup paths) * we still want to tls_strp_stop(), but sk->sk_data_ready was * never swapped. */ if (ctx->saved_data_ready) { write_lock_bh(&sk->sk_callback_lock); sk->sk_data_ready = ctx->saved_data_ready; write_unlock_bh(&sk->sk_callback_lock); } } } void tls_sw_strparser_done(struct tls_context *tls_ctx) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); tls_strp_done(&ctx->strp); } void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) { struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); kfree(ctx); } void tls_sw_free_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); tls_sw_release_resources_rx(sk); tls_sw_free_ctx_rx(tls_ctx); } /* The work handler to transmitt the encrypted records in tx_list */ static void tx_work_handler(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct tx_work *tx_work = container_of(delayed_work, struct tx_work, work); struct sock *sk = tx_work->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx; if (unlikely(!tls_ctx)) return; ctx = tls_sw_ctx_tx(tls_ctx); if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask)) return; if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; if (mutex_trylock(&tls_ctx->tx_lock)) { lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { /* Someone is holding the tx_lock, they will likely run Tx * and cancel the work on their way out of the lock section. * Schedule a long delay just in case. */ schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); } } static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) { struct tls_rec *rec; rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list); if (!rec) return false; return READ_ONCE(rec->tx_ready); } void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) { struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); /* Schedule the transmission if tx list is ready */ if (tls_is_tx_ready(tx_ctx) && !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) schedule_delayed_work(&tx_ctx->tx_work.work, 0); } void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) { struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); write_lock_bh(&sk->sk_callback_lock); rx_ctx->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); } void tls_update_rx_zc_capable(struct tls_context *tls_ctx) { struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); rx_ctx->zc_capable = tls_ctx->rx_no_pad || tls_ctx->prot_info.version != TLS_1_3_VERSION; } static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) { struct tls_sw_context_tx *sw_ctx_tx; if (!ctx->priv_ctx_tx) { sw_ctx_tx = kzalloc_obj(*sw_ctx_tx); if (!sw_ctx_tx) return NULL; } else { sw_ctx_tx = ctx->priv_ctx_tx; } crypto_init_wait(&sw_ctx_tx->async_wait); atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; return sw_ctx_tx; } static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) { struct tls_sw_context_rx *sw_ctx_rx; if (!ctx->priv_ctx_rx) { sw_ctx_rx = kzalloc_obj(*sw_ctx_rx); if (!sw_ctx_rx) return NULL; } else { sw_ctx_rx = ctx->priv_ctx_rx; } crypto_init_wait(&sw_ctx_rx->async_wait); atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); return sw_ctx_rx; } int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, const struct tls_cipher_desc *cipher_desc) { u16 nonce_size = cipher_desc->nonce; if (crypto_info->version == TLS_1_3_VERSION) { nonce_size = 0; prot->aad_size = TLS_HEADER_SIZE; prot->tail_size = 1; } else { prot->aad_size = TLS_AAD_SPACE_SIZE; prot->tail_size = 0; } /* Sanity-check the sizes for stack allocations. */ if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) return -EINVAL; prot->version = crypto_info->version; prot->cipher_type = crypto_info->cipher_type; prot->prepend_size = TLS_HEADER_SIZE + nonce_size; prot->tag_size = cipher_desc->tag; prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; prot->iv_size = cipher_desc->iv; prot->salt_size = cipher_desc->salt; prot->rec_seq_size = cipher_desc->rec_seq; return 0; } static void tls_finish_key_update(struct sock *sk, struct tls_context *tls_ctx) { struct tls_sw_context_rx *ctx = tls_ctx->priv_ctx_rx; WRITE_ONCE(ctx->key_update_pending, false); /* wake-up pre-existing poll() */ ctx->saved_data_ready(sk); } int tls_set_sw_offload(struct sock *sk, int tx, struct tls_crypto_info *new_crypto_info) { struct tls_crypto_info *crypto_info, *src_crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; const struct tls_cipher_desc *cipher_desc; char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; struct tls_prot_info *prot; struct crypto_aead **aead; struct tls_context *ctx; struct crypto_tfm *tfm; int rc = 0; ctx = tls_get_ctx(sk); prot = &ctx->prot_info; /* new_crypto_info != NULL means rekey */ if (!new_crypto_info) { if (tx) { ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); if (!ctx->priv_ctx_tx) return -ENOMEM; } else { ctx->priv_ctx_rx = init_ctx_rx(ctx); if (!ctx->priv_ctx_rx) return -ENOMEM; } } if (tx) { sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; } else { sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; aead = &sw_ctx_rx->aead_recv; } src_crypto_info = new_crypto_info ?: crypto_info; cipher_desc = get_cipher_desc(src_crypto_info->cipher_type); if (!cipher_desc) { rc = -EINVAL; goto free_priv; } rc = init_prot_info(prot, src_crypto_info, cipher_desc); if (rc) goto free_priv; iv = crypto_info_iv(src_crypto_info, cipher_desc); key = crypto_info_key(src_crypto_info, cipher_desc); salt = crypto_info_salt(src_crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(src_crypto_info, cipher_desc); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; goto free_priv; } } ctx->push_pending_record = tls_sw_push_pending_record; /* setkey is the last operation that could fail during a * rekey. if it succeeds, we can start modifying the * context. */ rc = crypto_aead_setkey(*aead, key, cipher_desc->key); if (rc) { if (new_crypto_info) goto out; else goto free_aead; } if (!new_crypto_info) { rc = crypto_aead_setauthsize(*aead, prot->tag_size); if (rc) goto free_aead; } if (!tx && !new_crypto_info) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); tls_update_rx_zc_capable(ctx); sw_ctx_rx->async_capable = src_crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); rc = tls_strp_init(&sw_ctx_rx->strp, sk); if (rc) goto free_aead; } memcpy(cctx->iv, salt, cipher_desc->salt); memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); if (new_crypto_info) { unsafe_memcpy(crypto_info, new_crypto_info, cipher_desc->crypto_info, /* size was checked in do_tls_setsockopt_conf */); memzero_explicit(new_crypto_info, cipher_desc->crypto_info); if (!tx) tls_finish_key_update(sk, ctx); } goto out; free_aead: crypto_free_aead(*aead); *aead = NULL; free_priv: if (!new_crypto_info) { if (tx) { kfree(ctx->priv_ctx_tx); ctx->priv_ctx_tx = NULL; } else { kfree(ctx->priv_ctx_rx); ctx->priv_ctx_rx = NULL; } } out: return rc; }
25 25 25 25 25 25 13 11 8 25 25 13 25 13 25 13 25 25 25 13 25 9 25 25 8 8 8 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 // SPDX-License-Identifier: GPL-2.0 /* * Functions related to sysfs handling */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/blktrace_api.h> #include <linux/debugfs.h> #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" #include "blk-wbt.h" #include "blk-cgroup.h" #include "blk-throttle.h" struct queue_sysfs_entry { struct attribute attr; ssize_t (*show)(struct gendisk *disk, char *page); ssize_t (*show_limit)(struct gendisk *disk, char *page); ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); int (*store_limit)(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim); }; static ssize_t queue_var_show(unsigned long var, char *page) { return sysfs_emit(page, "%lu\n", var); } static ssize_t queue_var_store(unsigned long *var, const char *page, size_t count) { int err; unsigned long v; err = kstrtoul(page, 10, &v); if (err || v > UINT_MAX) return -EINVAL; *var = v; return count; } static ssize_t queue_requests_show(struct gendisk *disk, char *page) { ssize_t ret; mutex_lock(&disk->queue->elevator_lock); ret = queue_var_show(disk->queue->nr_requests, page); mutex_unlock(&disk->queue->elevator_lock); return ret; } static ssize_t queue_requests_store(struct gendisk *disk, const char *page, size_t count) { struct request_queue *q = disk->queue; struct blk_mq_tag_set *set = q->tag_set; struct elevator_tags *et = NULL; unsigned int memflags; unsigned long nr; int ret; ret = queue_var_store(&nr, page, count); if (ret < 0) return ret; /* * Serialize updating nr_requests with concurrent queue_requests_store() * and switching elevator. * * Use trylock to avoid circular lock dependency with kernfs active * reference during concurrent disk deletion: * update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del) * kn->active -> update_nr_hwq_lock (via this sysfs write path) */ if (!down_write_trylock(&set->update_nr_hwq_lock)) return -EBUSY; if (nr == q->nr_requests) goto unlock; if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; /* * Switching elevator is protected by update_nr_hwq_lock: * - read lock is held from elevator sysfs attribute; * - write lock is held from updating nr_hw_queues; * Hence it's safe to access q->elevator here with write lock held. */ if (nr <= set->reserved_tags || (q->elevator && nr > MAX_SCHED_RQ) || (!q->elevator && nr > set->queue_depth)) { ret = -EINVAL; goto unlock; } if (!blk_mq_is_shared_tags(set->flags) && q->elevator && nr > q->elevator->et->nr_requests) { /* * Tags will grow, allocate memory before freezing queue to * prevent deadlock. */ et = blk_mq_alloc_sched_tags(set, q->nr_hw_queues, nr); if (!et) { ret = -ENOMEM; goto unlock; } } memflags = blk_mq_freeze_queue(q); mutex_lock(&q->elevator_lock); et = blk_mq_update_nr_requests(q, et, nr); mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); if (et) blk_mq_free_sched_tags(et, set); unlock: up_write(&set->update_nr_hwq_lock); return ret; } static ssize_t queue_async_depth_show(struct gendisk *disk, char *page) { guard(mutex)(&disk->queue->elevator_lock); return queue_var_show(disk->queue->async_depth, page); } static ssize_t queue_async_depth_store(struct gendisk *disk, const char *page, size_t count) { struct request_queue *q = disk->queue; unsigned int memflags; unsigned long nr; int ret; if (!queue_is_mq(q)) return -EINVAL; ret = queue_var_store(&nr, page, count); if (ret < 0) return ret; if (nr == 0) return -EINVAL; memflags = blk_mq_freeze_queue(q); scoped_guard(mutex, &q->elevator_lock) { if (q->elevator) { q->async_depth = min(q->nr_requests, nr); if (q->elevator->type->ops.depth_updated) q->elevator->type->ops.depth_updated(q); } else { ret = -EINVAL; } } blk_mq_unfreeze_queue(q, memflags); return ret; } static ssize_t queue_ra_show(struct gendisk *disk, char *page) { ssize_t ret; mutex_lock(&disk->queue->limits_lock); ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page); mutex_unlock(&disk->queue->limits_lock); return ret; } static ssize_t queue_ra_store(struct gendisk *disk, const char *page, size_t count) { unsigned long ra_kb; ssize_t ret; struct request_queue *q = disk->queue; ret = queue_var_store(&ra_kb, page, count); if (ret < 0) return ret; /* * The ->ra_pages change below is protected by ->limits_lock because it * is usually calculated from the queue limits by * queue_limits_commit_update(). * * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. * Use WRITE_ONCE() to write bdi->ra_pages once. */ mutex_lock(&q->limits_lock); WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); mutex_unlock(&q->limits_lock); return ret; } #define QUEUE_SYSFS_LIMIT_SHOW(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ { \ return queue_var_show(disk->queue->limits._field, page); \ } QUEUE_SYSFS_LIMIT_SHOW(max_segments) QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments) QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments) QUEUE_SYSFS_LIMIT_SHOW(max_segment_size) QUEUE_SYSFS_LIMIT_SHOW(max_write_streams) QUEUE_SYSFS_LIMIT_SHOW(write_stream_granularity) QUEUE_SYSFS_LIMIT_SHOW(logical_block_size) QUEUE_SYSFS_LIMIT_SHOW(physical_block_size) QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors) QUEUE_SYSFS_LIMIT_SHOW(io_min) QUEUE_SYSFS_LIMIT_SHOW(io_opt) QUEUE_SYSFS_LIMIT_SHOW(discard_granularity) QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity) QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask) QUEUE_SYSFS_LIMIT_SHOW(dma_alignment) QUEUE_SYSFS_LIMIT_SHOW(max_open_zones) QUEUE_SYSFS_LIMIT_SHOW(max_active_zones) QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min) QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max) #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%llu\n", \ (unsigned long long)disk->queue->limits._field << \ SECTOR_SHIFT); \ } QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ { \ return queue_var_show(disk->queue->limits._field >> 1, page); \ } QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors) #define QUEUE_SYSFS_SHOW_CONST(_name, _val) \ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%d\n", _val); \ } /* deprecated fields */ QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) static int queue_max_discard_sectors_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long max_discard_bytes; ssize_t ret; ret = queue_var_store(&max_discard_bytes, page, count); if (ret < 0) return ret; if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1)) return -EINVAL; if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) return -EINVAL; lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; return 0; } static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long max_zeroes_bytes, max_hw_zeroes_bytes; ssize_t ret; ret = queue_var_store(&max_zeroes_bytes, page, count); if (ret < 0) return ret; max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT; if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes) return -EINVAL; lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT; return 0; } static int queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long max_sectors_kb; ssize_t ret; ret = queue_var_store(&max_sectors_kb, page, count); if (ret < 0) return ret; lim->max_user_sectors = max_sectors_kb << 1; return 0; } static ssize_t queue_feature_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim, blk_features_t feature) { unsigned long val; ssize_t ret; ret = queue_var_store(&val, page, count); if (ret < 0) return ret; if (val) lim->features |= feature; else lim->features &= ~feature; return 0; } #define QUEUE_SYSFS_FEATURE(_name, _feature) \ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%u\n", \ !!(disk->queue->limits.features & _feature)); \ } \ static int queue_##_name##_store(struct gendisk *disk, \ const char *page, size_t count, struct queue_limits *lim) \ { \ return queue_feature_store(disk, page, count, lim, _feature); \ } QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM) QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT) QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES); #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ { \ return sysfs_emit(page, "%u\n", \ !!(disk->queue->limits.features & _feature)); \ } QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); static ssize_t queue_poll_show(struct gendisk *disk, char *page) { if (queue_is_mq(disk->queue)) return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); return sysfs_emit(page, "%u\n", !!(disk->queue->limits.features & BLK_FEAT_POLL)); } static ssize_t queue_zoned_show(struct gendisk *disk, char *page) { if (blk_queue_is_zoned(disk->queue)) return sysfs_emit(page, "host-managed\n"); return sysfs_emit(page, "none\n"); } static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) { return queue_var_show(disk_nr_zones(disk), page); } static ssize_t queue_zoned_qd1_writes_show(struct gendisk *disk, char *page) { return queue_var_show(!!blk_queue_zoned_qd1_writes(disk->queue), page); } static ssize_t queue_zoned_qd1_writes_store(struct gendisk *disk, const char *page, size_t count) { struct request_queue *q = disk->queue; unsigned long qd1_writes; unsigned int memflags; ssize_t ret; ret = queue_var_store(&qd1_writes, page, count); if (ret < 0) return ret; memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); if (qd1_writes) blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q); else blk_queue_flag_clear(QUEUE_FLAG_ZONED_QD1_WRITES, q); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q, memflags); return count; } static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) { return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); } static int queue_iostats_passthrough_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { unsigned long ios; ssize_t ret; ret = queue_var_store(&ios, page, count); if (ret < 0) return ret; if (ios) lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; else lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; return 0; } static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) { return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | blk_queue_noxmerges(disk->queue), page); } static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page, size_t count) { unsigned long nm; struct request_queue *q = disk->queue; ssize_t ret = queue_var_store(&nm, page, count); if (ret < 0) return ret; blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); if (nm == 2) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); else if (nm) blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); return ret; } static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page) { bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags); bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags); return queue_var_show(set << force, page); } static ssize_t queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count) { ssize_t ret = -EINVAL; #ifdef CONFIG_SMP struct request_queue *q = disk->queue; unsigned long val; ret = queue_var_store(&val, page, count); if (ret < 0) return ret; /* * Here we update two queue flags each using atomic bitops, although * updating two flags isn't atomic it should be harmless as those flags * are accessed individually using atomic test_bit operation. So we * don't grab any lock while updating these flags. */ if (val == 2) { blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); } else if (val == 1) { blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } else if (val == 0) { blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } #endif return ret; } static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page, size_t count) { return count; } static ssize_t queue_poll_store(struct gendisk *disk, const char *page, size_t count) { ssize_t ret = count; struct request_queue *q = disk->queue; if (!(q->limits.features & BLK_FEAT_POLL)) { ret = -EINVAL; goto out; } pr_info_ratelimited("writes to the poll attribute are ignored.\n"); pr_info_ratelimited("please use driver specific parameters instead.\n"); out: return ret; } static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page) { return sysfs_emit(page, "%u\n", jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout))); } static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, size_t count) { unsigned int val; int err; struct request_queue *q = disk->queue; err = kstrtou32(page, 10, &val); if (err || val == 0) return -EINVAL; blk_queue_rq_timeout(q, msecs_to_jiffies(val)); return count; } static ssize_t queue_wc_show(struct gendisk *disk, char *page) { if (blk_queue_write_cache(disk->queue)) return sysfs_emit(page, "write back\n"); return sysfs_emit(page, "write through\n"); } static int queue_wc_store(struct gendisk *disk, const char *page, size_t count, struct queue_limits *lim) { bool disable; if (!strncmp(page, "write back", 10)) { disable = false; } else if (!strncmp(page, "write through", 13) || !strncmp(page, "none", 4)) { disable = true; } else { return -EINVAL; } if (disable) lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; else lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; return 0; } #define QUEUE_RO_ENTRY(_prefix, _name) \ static const struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0444 }, \ .show = _prefix##_show, \ }; #define QUEUE_RW_ENTRY(_prefix, _name) \ static const struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0644 }, \ .show = _prefix##_show, \ .store = _prefix##_store, \ }; #define QUEUE_LIM_RO_ENTRY(_prefix, _name) \ static const struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0444 }, \ .show_limit = _prefix##_show, \ } #define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ static const struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0644 }, \ .show_limit = _prefix##_show, \ .store_limit = _prefix##_store, \ } QUEUE_RW_ENTRY(queue_requests, "nr_requests"); QUEUE_RW_ENTRY(queue_async_depth, "async_depth"); QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments"); QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size"); QUEUE_LIM_RO_ENTRY(queue_max_write_streams, "max_write_streams"); QUEUE_LIM_RO_ENTRY(queue_write_stream_granularity, "write_stream_granularity"); QUEUE_RW_ENTRY(elv_iosched, "scheduler"); QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size"); QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size"); QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors"); QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size"); QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size"); QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity"); QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors, "atomic_write_boundary_bytes"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors, "write_zeroes_unmap_max_hw_bytes"); QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors, "write_zeroes_unmap_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned"); QUEUE_RW_ENTRY(queue_zoned_qd1_writes, "zoned_qd1_writes"); QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones"); QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones"); QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); QUEUE_RW_ENTRY(queue_poll, "io_poll"); QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); QUEUE_LIM_RO_ENTRY(queue_fua, "fua"); QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); /* legacy alias for logical_block_size: */ static const struct queue_sysfs_entry queue_hw_sector_size_entry = { .attr = {.name = "hw_sector_size", .mode = 0444 }, .show_limit = queue_logical_block_size_show, }; QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); #ifdef CONFIG_BLK_WBT static ssize_t queue_var_store64(s64 *var, const char *page) { int err; s64 v; err = kstrtos64(page, 10, &v); if (err < 0) return err; *var = v; return 0; } static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) { ssize_t ret; struct request_queue *q = disk->queue; mutex_lock(&disk->rqos_state_mutex); if (!wbt_rq_qos(q)) { ret = -EINVAL; goto out; } if (wbt_disabled(q)) { ret = sysfs_emit(page, "0\n"); goto out; } ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); out: mutex_unlock(&disk->rqos_state_mutex); return ret; } static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, size_t count) { ssize_t ret; s64 val; ret = queue_var_store64(&val, page); if (ret < 0) return ret; if (val < -1) return -EINVAL; ret = wbt_set_lat(disk, val); return ret ? ret : count; } QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); #endif /* Common attributes for bio-based and request-based queues. */ static const struct attribute *const queue_attrs[] = { /* * Attributes which are protected with q->limits_lock. */ &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, &queue_max_discard_segments_entry.attr, &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_max_write_streams_entry.attr, &queue_write_stream_granularity_entry.attr, &queue_hw_sector_size_entry.attr, &queue_logical_block_size_entry.attr, &queue_physical_block_size_entry.attr, &queue_chunk_sectors_entry.attr, &queue_io_min_entry.attr, &queue_io_opt_entry.attr, &queue_discard_granularity_entry.attr, &queue_max_discard_sectors_entry.attr, &queue_max_hw_discard_sectors_entry.attr, &queue_atomic_write_max_sectors_entry.attr, &queue_atomic_write_boundary_sectors_entry.attr, &queue_atomic_write_unit_min_entry.attr, &queue_atomic_write_unit_max_entry.attr, &queue_max_write_zeroes_sectors_entry.attr, &queue_max_hw_wzeroes_unmap_sectors_entry.attr, &queue_max_wzeroes_unmap_sectors_entry.attr, &queue_max_zone_append_sectors_entry.attr, &queue_zone_write_granularity_entry.attr, &queue_rotational_entry.attr, &queue_zoned_entry.attr, &queue_max_open_zones_entry.attr, &queue_max_active_zones_entry.attr, &queue_iostats_passthrough_entry.attr, &queue_iostats_entry.attr, &queue_stable_writes_entry.attr, &queue_add_random_entry.attr, &queue_wc_entry.attr, &queue_fua_entry.attr, &queue_dax_entry.attr, &queue_virt_boundary_mask_entry.attr, &queue_dma_alignment_entry.attr, &queue_ra_entry.attr, /* * Attributes which don't require locking. */ &queue_discard_zeroes_data_entry.attr, &queue_write_same_max_entry.attr, &queue_nr_zones_entry.attr, &queue_nomerges_entry.attr, &queue_poll_entry.attr, &queue_poll_delay_entry.attr, &queue_zoned_qd1_writes_entry.attr, NULL, }; /* Request-based queue attributes that are not relevant for bio-based queues. */ static const struct attribute *const blk_mq_queue_attrs[] = { /* * Attributes which require some form of locking other than * q->sysfs_lock. */ &elv_iosched_entry.attr, &queue_requests_entry.attr, &queue_async_depth_entry.attr, #ifdef CONFIG_BLK_WBT &queue_wb_lat_entry.attr, #endif /* * Attributes which don't require locking. */ &queue_rq_affinity_entry.attr, &queue_io_timeout_entry.attr, NULL, }; static umode_t queue_attr_visible(struct kobject *kobj, const struct attribute *attr, int n) { struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; if ((attr == &queue_max_open_zones_entry.attr || attr == &queue_max_active_zones_entry.attr || attr == &queue_zoned_qd1_writes_entry.attr) && !blk_queue_is_zoned(q)) return 0; return attr->mode; } static umode_t blk_mq_queue_attr_visible(struct kobject *kobj, const struct attribute *attr, int n) { struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; if (!queue_is_mq(q)) return 0; if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout) return 0; return attr->mode; } static const struct attribute_group queue_attr_group = { .attrs_const = queue_attrs, .is_visible_const = queue_attr_visible, }; static const struct attribute_group blk_mq_queue_attr_group = { .attrs_const = blk_mq_queue_attrs, .is_visible_const = blk_mq_queue_attr_visible, }; #define to_queue(atr) container_of_const((atr), struct queue_sysfs_entry, attr) static ssize_t queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); if (!entry->show && !entry->show_limit) return -EIO; if (entry->show_limit) { ssize_t res; mutex_lock(&disk->queue->limits_lock); res = entry->show_limit(disk, page); mutex_unlock(&disk->queue->limits_lock); return res; } return entry->show(disk, page); } static ssize_t queue_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; if (!entry->store_limit && !entry->store) return -EIO; if (entry->store_limit) { ssize_t res; struct queue_limits lim = queue_limits_start_update(q); res = entry->store_limit(disk, page, length, &lim); if (res < 0) { queue_limits_cancel_update(q); return res; } res = queue_limits_commit_update_frozen(q, &lim); if (res) return res; return length; } return entry->store(disk, page, length); } static const struct sysfs_ops queue_sysfs_ops = { .show = queue_attr_show, .store = queue_attr_store, }; static const struct attribute_group *blk_queue_attr_groups[] = { &queue_attr_group, &blk_mq_queue_attr_group, NULL }; static void blk_queue_release(struct kobject *kobj) { /* nothing to do here, all data is associated with the parent gendisk */ } const struct kobj_type blk_queue_ktype = { .default_groups = blk_queue_attr_groups, .sysfs_ops = &queue_sysfs_ops, .release = blk_queue_release, }; static void blk_debugfs_remove(struct gendisk *disk) { struct request_queue *q = disk->queue; blk_debugfs_lock_nomemsave(q); blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; q->sched_debugfs_dir = NULL; q->rqos_debugfs_dir = NULL; blk_debugfs_unlock_nomemrestore(q); } /** * blk_register_queue - register a block layer queue with sysfs * @disk: Disk of which the request queue should be registered with sysfs. */ int blk_register_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; unsigned int memflags; int ret; ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) return ret; if (queue_is_mq(q)) { ret = blk_mq_sysfs_register(disk); if (ret) goto out_del_queue_kobj; } mutex_lock(&q->sysfs_lock); memflags = blk_debugfs_lock(q); q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); if (queue_is_mq(q)) blk_mq_debugfs_register(q); blk_debugfs_unlock(q, memflags); /* * For blk-mq rotational zoned devices, default to using QD=1 * writes. For non-mq rotational zoned devices, the device driver can * set an appropriate default. */ if (queue_is_mq(q) && blk_queue_rot(q) && blk_queue_is_zoned(q)) blk_queue_flag_set(QUEUE_FLAG_ZONED_QD1_WRITES, q); ret = disk_register_independent_access_ranges(disk); if (ret) goto out_debugfs_remove; ret = blk_crypto_sysfs_register(disk); if (ret) goto out_unregister_ia_ranges; if (queue_is_mq(q)) elevator_set_default(q); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); wbt_init_enable_default(disk); /* Now everything is ready and send out KOBJ_ADD uevent */ kobject_uevent(&disk->queue_kobj, KOBJ_ADD); if (q->elevator) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); /* * SCSI probing may synchronously create and destroy a lot of * request_queues for non-existent devices. Shutting down a fully * functional queue takes measureable wallclock time as RCU grace * periods are involved. To avoid excessive latency in these * cases, a request_queue starts out in a degraded mode which is * faster to shut down and is made fully functional here as * request_queues for non-existent devices never get registered. */ blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); percpu_ref_switch_to_percpu(&q->q_usage_counter); return ret; out_unregister_ia_ranges: disk_unregister_independent_access_ranges(disk); out_debugfs_remove: blk_debugfs_remove(disk); mutex_unlock(&q->sysfs_lock); if (queue_is_mq(q)) blk_mq_sysfs_unregister(disk); out_del_queue_kobj: kobject_del(&disk->queue_kobj); return ret; } /** * blk_unregister_queue - counterpart of blk_register_queue() * @disk: Disk of which the request queue should be unregistered from sysfs. * * Note: the caller is responsible for guaranteeing that this function is called * after blk_register_queue() has finished. */ void blk_unregister_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; if (WARN_ON(!q)) return; /* Return early if disk->queue was never registered. */ if (!blk_queue_registered(q)) return; /* * Since sysfs_remove_dir() prevents adding new directory entries * before removal of existing entries starts, protect against * concurrent elv_iosched_store() calls. */ mutex_lock(&q->sysfs_lock); blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); mutex_unlock(&q->sysfs_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. */ if (queue_is_mq(q)) blk_mq_sysfs_unregister(disk); blk_crypto_sysfs_unregister(disk); mutex_lock(&q->sysfs_lock); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); kobject_del(&disk->queue_kobj); if (queue_is_mq(q)) elevator_set_none(q); blk_debugfs_remove(disk); }
4 4 4 3 1 1 2 2 2 4 3 1 2 1 1 3 8 1 7 1 6 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C)2003-2006 Helsinki University of Technology * Copyright (C)2003-2006 USAGI/WIDE Project */ /* * Authors: * Noriaki TAKAMIYA @USAGI * Masahide NAKAMURA @USAGI */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/time.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <net/rawv6.h> #include <net/xfrm.h> #include <net/mip6.h> static inline unsigned int calc_padlen(unsigned int len, unsigned int n) { return (n - len + 16) & 0x7; } static inline void *mip6_padn(__u8 *data, __u8 padlen) { if (!data) return NULL; if (padlen == 1) { data[0] = IPV6_TLV_PAD1; } else if (padlen > 1) { data[0] = IPV6_TLV_PADN; data[1] = padlen - 2; if (padlen > 2) memset(data+2, 0, data[1]); } return data + padlen; } static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos); } static int mip6_mh_len(int type) { int len = 0; switch (type) { case IP6_MH_TYPE_BRR: len = 0; break; case IP6_MH_TYPE_HOTI: case IP6_MH_TYPE_COTI: case IP6_MH_TYPE_BU: case IP6_MH_TYPE_BACK: len = 1; break; case IP6_MH_TYPE_HOT: case IP6_MH_TYPE_COT: case IP6_MH_TYPE_BERROR: len = 2; break; } return len; } static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh _hdr; const struct ip6_mh *mh; mh = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_hdr), &_hdr); if (!mh) return -1; if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + skb_network_header_len(skb)); return -1; } return 0; } struct mip6_report_rate_limiter { spinlock_t lock; ktime_t stamp; int iif; struct in6_addr src; struct in6_addr dst; }; static struct mip6_report_rate_limiter mip6_report_rl = { .lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock) }; static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; int err = destopt->nexthdr; spin_lock(&x->lock); if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); return err; } /* Destination Option Header is inserted. * IP Header's src address is replaced with Home Address Option in * Destination Option Header. */ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph; struct ipv6_destopt_hdr *dstopt; struct ipv6_destopt_hao *hao; u8 nexthdr; int len; skb_push(skb, -skb_network_offset(skb)); iph = ipv6_hdr(skb); nexthdr = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_DSTOPTS; dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb); dstopt->nexthdr = nexthdr; hao = mip6_padn((char *)(dstopt + 1), calc_padlen(sizeof(*dstopt), 6)); hao->type = IPV6_TLV_HAO; BUILD_BUG_ON(sizeof(*hao) != 18); hao->length = sizeof(*hao) - 2; len = ((char *)hao - (char *)dstopt) + sizeof(*hao); memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); spin_lock_bh(&x->lock); memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); spin_unlock_bh(&x->lock); WARN_ON(len != x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; return 0; } static inline int mip6_report_rl_allow(ktime_t stamp, const struct in6_addr *dst, const struct in6_addr *src, int iif) { int allow = 0; spin_lock_bh(&mip6_report_rl.lock); if (mip6_report_rl.stamp != stamp || mip6_report_rl.iif != iif || !ipv6_addr_equal(&mip6_report_rl.src, src) || !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { mip6_report_rl.stamp = stamp; mip6_report_rl.iif = iif; mip6_report_rl.src = *src; mip6_report_rl.dst = *dst; allow = 1; } spin_unlock_bh(&mip6_report_rl.lock); return allow; } static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, const struct flowi *fl) { struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; const struct flowi6 *fl6 = &fl->u.ip6; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; int offset; ktime_t stamp; int err = 0; if (unlikely(fl6->flowi6_proto == IPPROTO_MH && fl6->fl6_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); if (likely(offset >= 0)) hao = (struct ipv6_destopt_hao *) (skb_network_header(skb) + offset); } stamp = skb_get_ktime(skb); if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr, hao ? &hao->addr : &ipv6_hdr(skb)->saddr, opt->iif)) goto out; memset(&sel, 0, sizeof(sel)); memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, sizeof(sel.daddr)); sel.prefixlen_d = 128; memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; sel.proto = fl6->flowi6_proto; sel.dport = xfrm_flowi_dport(fl, &fl6->uli); if (sel.dport) sel.dport_mask = htons(~0); sel.sport = xfrm_flowi_sport(fl, &fl6->uli); if (sel.sport) sel.sport_mask = htons(~0); sel.ifindex = fl6->flowi6_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); out: return err; } static int mip6_destopt_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { NL_SET_ERR_MSG(extack, "SPI must be 0"); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION"); return -EINVAL; } x->props.header_len = sizeof(struct ipv6_destopt_hdr) + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + sizeof(struct ipv6_destopt_hao); WARN_ON(x->props.header_len != 24); return 0; } /* * Do nothing about destroying since it has no specific operation for * destination options header unlike IPsec protocols. */ static void mip6_destopt_destroy(struct xfrm_state *x) { } static const struct xfrm_type mip6_destopt_type = { .owner = THIS_MODULE, .proto = IPPROTO_DSTOPTS, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, .init_state = mip6_destopt_init_state, .destructor = mip6_destopt_destroy, .input = mip6_destopt_input, .output = mip6_destopt_output, .reject = mip6_destopt_reject, }; static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; int err = rt2->rt_hdr.nexthdr; spin_lock(&x->lock); if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); return err; } /* Routing Header type 2 is inserted. * IP Header's dst address is replaced with Routing Header's Home Address. */ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph; struct rt2_hdr *rt2; u8 nexthdr; skb_push(skb, -skb_network_offset(skb)); iph = ipv6_hdr(skb); nexthdr = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_ROUTING; rt2 = (struct rt2_hdr *)skb_transport_header(skb); rt2->rt_hdr.nexthdr = nexthdr; rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; rt2->rt_hdr.segments_left = 1; memset(&rt2->reserved, 0, sizeof(rt2->reserved)); WARN_ON(rt2->rt_hdr.hdrlen != 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); spin_lock_bh(&x->lock); memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); spin_unlock_bh(&x->lock); return 0; } static int mip6_rthdr_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { NL_SET_ERR_MSG(extack, "SPI must be 0"); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION"); return -EINVAL; } x->props.header_len = sizeof(struct rt2_hdr); return 0; } /* * Do nothing about destroying since it has no specific operation for routing * header type 2 unlike IPsec protocols. */ static void mip6_rthdr_destroy(struct xfrm_state *x) { } static const struct xfrm_type mip6_rthdr_type = { .owner = THIS_MODULE, .proto = IPPROTO_ROUTING, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, .init_state = mip6_rthdr_init_state, .destructor = mip6_rthdr_destroy, .input = mip6_rthdr_input, .output = mip6_rthdr_output, }; static int __init mip6_init(void) { pr_info("Mobile IPv6\n"); if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type(destopt)\n", __func__); goto mip6_destopt_xfrm_fail; } if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type(rthdr)\n", __func__); goto mip6_rthdr_xfrm_fail; } if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { pr_info("%s: can't add rawv6 mh filter\n", __func__); goto mip6_rawv6_mh_fail; } return 0; mip6_rawv6_mh_fail: xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); mip6_rthdr_xfrm_fail: xfrm_unregister_type(&mip6_destopt_type, AF_INET6); mip6_destopt_xfrm_fail: return -EAGAIN; } static void __exit mip6_fini(void) { if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) pr_info("%s: can't remove rawv6 mh filter\n", __func__); xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); xfrm_unregister_type(&mip6_destopt_type, AF_INET6); } module_init(mip6_init); module_exit(mip6_fini); MODULE_DESCRIPTION("IPv6 Mobility driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H #include <linux/uaccess.h> #include <linux/ptrace.h> #include <asm/cpu_entry_area.h> #include <asm/switch_to.h> enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; struct stack_info { enum stack_type type; unsigned long *begin, *end, *next_sp; }; bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, struct stack_info *info); static __always_inline bool get_stack_guard_info(unsigned long *stack, struct stack_info *info) { /* make sure it's not in the stack proper */ if (get_stack_info_noinstr(stack, current, info)) return false; /* but if it is in the page below it, we hit a guard */ return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info); } const char *stack_type_name(enum stack_type type); static inline bool on_stack(struct stack_info *info, void *addr, size_t len) { void *begin = info->begin; void *end = info->end; return (info->type != STACK_TYPE_UNKNOWN && addr >= begin && addr < end && addr + len > begin && addr + len <= end); } #ifdef CONFIG_X86_32 #define STACKSLOTS_PER_LINE 8 #else #define STACKSLOTS_PER_LINE 4 #endif #ifdef CONFIG_FRAME_POINTER static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->bp; if (task == current) return __builtin_frame_address(0); return &((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long * get_frame_pointer(struct task_struct *task, struct pt_regs *regs) { return NULL; } #endif /* CONFIG_FRAME_POINTER */ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); return (unsigned long *)task->thread.sp; } /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; unsigned long return_address; }; struct stack_frame_ia32 { u32 next_frame; u32 return_address; }; void show_opcodes(struct pt_regs *regs, const char *loglvl); void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #ifndef __INODE_DOT_H__ #define __INODE_DOT_H__ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/mm.h> #include "util.h" bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask); ssize_t gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, size_t size); void gfs2_set_aops(struct inode *inode); static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { return !ip->i_height; } static inline int gfs2_is_jdata(const struct gfs2_inode *ip) { return ip->i_diskflags & GFS2_DIF_JDATA; } static inline bool gfs2_is_ordered(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_data == GFS2_DATA_ORDERED; } static inline bool gfs2_is_writeback(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK; } static inline int gfs2_is_dir(const struct gfs2_inode *ip) { return S_ISDIR(ip->i_inode.i_mode); } static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) { inode->i_blocks = blocks << (inode->i_blkbits - SECTOR_SHIFT); } static inline u64 gfs2_get_inode_blocks(const struct inode *inode) { return inode->i_blocks >> (inode->i_blkbits - SECTOR_SHIFT); } static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) { change <<= inode->i_blkbits - SECTOR_SHIFT; gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change)); inode->i_blocks += change; } static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, u64 no_formal_ino) { return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino; } static inline void gfs2_inum_out(const struct gfs2_inode *ip, struct gfs2_dirent *dent) { dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); } static inline int gfs2_check_internal_file_size(struct inode *inode, u64 minsize, u64 maxsize) { u64 size = i_size_read(inode); if (size < minsize || size > maxsize) goto err; if (size & (BIT(inode->i_blkbits) - 1)) goto err; return 0; err: gfs2_consist_inode(GFS2_I(inode)); return -EIO; } void gfs2_setup_inode(struct inode *inode); struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, u64 no_addr, u64 no_formal_ino, unsigned int blktype); struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 no_formal_ino, unsigned int blktype); int gfs2_dinode_dealloc(struct gfs2_inode *ip); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); struct inode *gfs2_lookup_meta(struct inode *dip, const char *name); void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); int gfs2_open_common(struct inode *inode, struct file *file); loff_t gfs2_seek_data(struct file *file, loff_t offset); loff_t gfs2_seek_hole(struct file *file, loff_t offset); extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; int gfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int gfs2_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct file_kattr *fa); void gfs2_set_inode_flags(struct inode *inode); #ifdef CONFIG_GFS2_FS_LOCKING_DLM extern const struct file_operations gfs2_file_fops; extern const struct file_operations gfs2_dir_fops; static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_localflocks; } #else /* Single node only */ #define gfs2_file_fops gfs2_file_fops_nolock #define gfs2_dir_fops gfs2_dir_fops_nolock static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) { return 1; } #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ #endif /* __INODE_DOT_H__ */
6 8 5 3 2 6 5 5 5 5 5 5 3 5 2 1 1 1 1 1 1 2 4 2 4 4 3 3 3 3 3 3 2 2 2 1 2 3 1 4 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> #include <net/inet_sock.h> #include <net/raw.h> #include <net/rawv6.h> #ifdef pr_fmt # undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt static struct raw_hashinfo * raw_get_hashinfo(const struct inet_diag_req_v2 *r) { if (r->sdiag_family == AF_INET) { return &raw_v4_hashinfo; #if IS_ENABLED(CONFIG_IPV6) } else if (r->sdiag_family == AF_INET6) { return &raw_v6_hashinfo; #endif } else { return ERR_PTR(-EINVAL); } } /* * Due to requirement of not breaking user API we can't simply * rename @pad field in inet_diag_req_v2 structure, instead * use helper to figure it out. */ static bool raw_lookup(struct net *net, const struct sock *sk, const struct inet_diag_req_v2 *req) { struct inet_diag_req_raw *r = (void *)req; if (r->sdiag_family == AF_INET) return raw_v4_match(net, sk, r->sdiag_raw_protocol, r->id.idiag_dst[0], r->id.idiag_src[0], r->id.idiag_if, 0); #if IS_ENABLED(CONFIG_IPV6) else return raw_v6_match(net, sk, r->sdiag_raw_protocol, (const struct in6_addr *)r->id.idiag_src, (const struct in6_addr *)r->id.idiag_dst, r->id.idiag_if, 0); #endif return false; } static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct hlist_head *hlist; struct sock *sk; int slot; if (IS_ERR(hashinfo)) return ERR_CAST(hashinfo); rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { hlist = &hashinfo->ht[slot]; sk_for_each_rcu(sk, hlist) { if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill * diag message to be reported, so * caller should call sock_put then. */ if (refcount_inc_not_zero(&sk->sk_refcnt)) goto out_unlock; } } } sk = ERR_PTR(-ENOENT); out_unlock: rcu_read_unlock(); return sk; } static int raw_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { struct sk_buff *in_skb = cb->skb; struct sk_buff *rep; struct sock *sk; struct net *net; int err; net = sock_net(in_skb->sk); sk = raw_sock_get(net, r); if (IS_ERR(sk)) return PTR_ERR(sk); rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + inet_diag_msg_attrs_size() + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); return -ENOMEM; } err = inet_sk_diag_fill(sk, NULL, rep, cb, r, 0, netlink_net_capable(in_skb, CAP_NET_ADMIN)); sock_put(sk); if (err < 0) { kfree_skb(rep); return err; } err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); return err; } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, bool net_admin) { if (!inet_diag_bc_sk(cb->data, sk)) return 0; return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin); } static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); int num, s_num, slot, s_slot; struct hlist_head *hlist; struct sock *sk = NULL; if (IS_ERR(hashinfo)) return; s_slot = cb->args[0]; num = s_num = cb->args[1]; rcu_read_lock(); for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) { num = 0; hlist = &hashinfo->ht[slot]; sk_for_each_rcu(sk, hlist) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) goto next; if (sk->sk_family != r->sdiag_family) goto next; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next; if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next; if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) goto out_unlock; next: num++; } } out_unlock: rcu_read_unlock(); cb->args[0] = slot; cb->args[1] = num; } static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info) { r->idiag_rqueue = sk_rmem_alloc_get(sk); r->idiag_wqueue = sk_wmem_alloc_get(sk); } #ifdef CONFIG_INET_DIAG_DESTROY static int raw_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *r) { struct net *net = sock_net(in_skb->sk); struct sock *sk; int err; sk = raw_sock_get(net, r); if (IS_ERR(sk)) return PTR_ERR(sk); err = sock_diag_destroy(sk, ECONNABORTED); sock_put(sk); return err; } #endif static const struct inet_diag_handler raw_diag_handler = { .owner = THIS_MODULE, .dump = raw_diag_dump, .dump_one = raw_diag_dump_one, .idiag_get_info = raw_diag_get_info, .idiag_type = IPPROTO_RAW, .idiag_info_size = 0, #ifdef CONFIG_INET_DIAG_DESTROY .destroy = raw_diag_destroy, #endif }; static void __always_unused __check_inet_diag_req_raw(void) { /* * Make sure the two structures are identical, * except the @pad field. */ #define __offset_mismatch(m1, m2) \ (offsetof(struct inet_diag_req_v2, m1) != \ offsetof(struct inet_diag_req_raw, m2)) BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) != sizeof(struct inet_diag_req_raw)); BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family)); BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol)); BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext)); BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol)); BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states)); BUILD_BUG_ON(__offset_mismatch(id, id)); #undef __offset_mismatch } static int __init raw_diag_init(void) { return inet_diag_register(&raw_diag_handler); } static void __exit raw_diag_exit(void) { inet_diag_unregister(&raw_diag_handler); } module_init(raw_diag_init); module_exit(raw_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
6 6 1 22 1 22 21 21 19 19 19 19 6 6 18 18 18 18 16 15 15 12 15 2 13 13 13 13 13 13 13 13 13 13 9 22 2 2 125 122 122 7 122 120 120 120 4 4 4 4 4 4 2 2 2 2 2 2 2 2 15 15 15 15 12 12 11 5 5 5 4 4 4 4 4 2 2 1 3 36 35 36 36 1 105 105 105 105 200 197 372 15 15 15 15 9 9 46 46 44 46 43 339 9 357 322 321 322 319 322 3 3 322 322 169 116 5 156 156 5 5 156 5 3 162 162 43 44 161 150 2 135 135 135 133 135 2 135 3 3 2 2 9 9 9 1 3 9 15 15 12 3 9 15 15 12 15 14 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * These functions handle all input from the IP layer into SCTP. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Xingang Guo <xingang.guo@intel.com> * Jon Grimm <jgrimm@us.ibm.com> * Hui Huang <hui.huang@nokia.com> * Daisy Chang <daisyc@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> */ #include <linux/types.h> #include <linux/list.h> /* For struct list_head */ #include <linux/socket.h> #include <linux/ip.h> #include <linux/time.h> /* For struct timeval */ #include <linux/slab.h> #include <net/ip.h> #include <net/icmp.h> #include <net/snmp.h> #include <net/sock.h> #include <net/xfrm.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/checksum.h> #include <net/net_namespace.h> #include <linux/rhashtable.h> #include <net/sock_reuseport.h> /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif); static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *daddr, int dif, int sdif); static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt, int dif, int sdif); static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; __le32 val = sctp_compute_cksum(skb, 0); if (val != cmp) { /* CRC failure, dump it. */ __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; } /* * This is the routine which IP calls when receiving an SCTP packet. */ int sctp_rcv(struct sk_buff *skb) { struct sock *sk; struct sctp_association *asoc; struct sctp_endpoint *ep = NULL; struct sctp_ep_common *rcvr; struct sctp_transport *transport = NULL; struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); int dif, sdif; if (skb->pkt_type != PACKET_HOST) goto discard_it; __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS); /* If packet is too small to contain a single chunk, let's not * waste time on it anymore. */ if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + skb_transport_offset(skb)) goto discard_it; /* If the packet is fragmented and we need to do crc checking, * it's better to just linearize it otherwise crc computing * takes longer. */ if (((!is_gso || skb_cloned(skb)) && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; /* Pull up the IP header. */ __skb_pull(skb, skb_transport_offset(skb)); skb->csum_valid = 0; /* Previous value not applicable */ if (skb_csum_unnecessary(skb)) __skb_decr_checksum_unnecessary(skb); else if (!sctp_checksum_disable && !is_gso && sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb->csum_valid = 1; __skb_pull(skb, sizeof(struct sctphdr)); family = ipver2af(ip_hdr(skb)->version); af = sctp_get_af_specific(family); if (unlikely(!af)) goto discard_it; SCTP_INPUT_CB(skb)->af = af; /* Initialize local addresses for lookups. */ af->from_skb(&src, skb, 1); af->from_skb(&dest, skb, 0); dif = af->skb_iif(skb); sdif = af->skb_sdif(skb); /* If the packet is to or from a non-unicast address, * silently discard the packet. * * This is not clearly defined in the RFC except in section * 8.4 - OOTB handling. However, based on the book "Stream Control * Transmission Protocol" 2.1, "It is important to note that the * IP address of an SCTP transport address must be a routable * unicast address. In other words, IP multicast addresses and * IP broadcast addresses cannot be used in an SCTP transport * address." */ if (!af->addr_valid(&src, NULL, skb) || !af->addr_valid(&dest, NULL, skb)) goto discard_it; asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport, dif, sdif); if (!asoc) ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src, dif, sdif); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; sk = rcvr->sk; /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * An SCTP packet is called an "out of the blue" (OOTB) * packet if it is correctly formed, i.e., passed the * receiver's checksum check, but the receiver is not * able to identify the association to which this * packet belongs. */ if (!asoc) { if (sctp_rcv_ootb(skb)) { __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; nf_reset_ct(skb); if (sk_filter(sk, skb) || skb->len < sizeof(struct sctp_chunkhdr)) goto discard_release; /* Create an SCTP packet structure. */ chunk = sctp_chunkify(skb, asoc, sk, GFP_ATOMIC); if (!chunk) goto discard_release; SCTP_INPUT_CB(skb)->chunk = chunk; /* Remember what endpoint is to handle this packet. */ chunk->rcvr = rcvr; /* Remember the SCTP header. */ chunk->sctp_hdr = sctp_hdr(skb); /* Set the source and destination addresses of the incoming chunk. */ sctp_init_addrs(chunk, &src, &dest); /* Remember where we came from. */ chunk->transport = transport; /* Acquire access to the sock lock. Note: We are safe from other * bottom halves on this lock, but a user may be in the lock too, * so check if it is busy. */ bh_lock_sock(sk); if (sk != rcvr->sk) { /* Our cached sk is different from the rcvr->sk. This is * because migrate()/accept() may have moved the association * to a new socket and released all the sockets. So now we * are holding a lock on the old socket while the user may * be doing something with the new socket. Switch our veiw * of the current sk. */ bh_unlock_sock(sk); sk = rcvr->sk; bh_lock_sock(sk); } if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { if (sctp_add_backlog(sk, skb)) { bh_unlock_sock(sk); sctp_chunk_free(chunk); skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG); } else { __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ if (transport) sctp_transport_put(transport); else sctp_endpoint_put(ep); return 0; discard_it: __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ if (transport) sctp_transport_put(transport); else sctp_endpoint_put(ep); goto discard_it; } /* Process the backlog queue of the socket. Every skb on * the backlog holds a ref on an association or endpoint. * We hold this ref throughout the state machine to make * sure that the structure we need is still around. */ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; rcvr = chunk->rcvr; /* If the rcvr is dead then the association or endpoint * has been deleted and we can safely drop the chunk * and refs that we are holding. */ if (rcvr->dead) { sctp_chunk_free(chunk); goto done; } if (unlikely(rcvr->sk != sk)) { /* In this case, the association moved from one socket to * another. We are currently sitting on the backlog of the * old socket, so we need to move. * However, since we are here in the process context we * need to take make sure that the user doesn't own * the new socket when we process the packet. * If the new socket is user-owned, queue the chunk to the * backlog of the new socket without dropping any refs. * Otherwise, we can safely push the chunk on the inqueue. */ sk = rcvr->sk; local_bh_disable(); bh_lock_sock(sk); if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) { if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) sctp_chunk_free(chunk); else backloged = 1; } else sctp_inq_push(inqueue, chunk); bh_unlock_sock(sk); local_bh_enable(); /* If the chunk was backloged again, don't drop refs */ if (backloged) return 0; } else { if (!sctp_newsk_ready(sk)) { if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) return 0; sctp_chunk_free(chunk); } else { sctp_inq_push(inqueue, chunk); } } done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else BUG(); return 0; } static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else BUG(); } return ret; } /* Handle icmp frag needed error. */ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { if (!t || (t->pathmtu <= pmtu && t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu)) return; if (sock_owned_by_user(sk)) { atomic_set(&t->mtu_info, pmtu); asoc->pmtu_pending = 1; t->pmtu_pending = 1; return; } if (!(t->param_flags & SPP_PMTUD_ENABLE)) /* We can't allow retransmitting in such case, as the * retransmission would be sized just as before, and thus we * would get another icmp, and retransmit again. */ return; /* Update transports view of the MTU. Return if no update was needed. * If an update wasn't needed/possible, it also doesn't make sense to * try to retransmit now. */ if (!sctp_transport_update_pmtu(t, pmtu)) return; /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); /* Retransmit with the new pmtu setting. */ sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, struct sk_buff *skb) { struct dst_entry *dst; if (sock_owned_by_user(sk) || !t) return; dst = sctp_transport_dst_check(t); if (dst) dst->ops->redirect(dst, sk, skb); } /* * SCTP Implementer's Guide, 2.37 ICMP handling procedures * * ICMP8) If the ICMP code is a "Unrecognized next header type encountered" * or a "Protocol Unreachable" treat this message as an abort * with the T bit set. * * This function sends an event to the state machine, which will abort the * association. * */ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t) { if (sock_owned_by_user(sk)) { if (timer_pending(&t->proto_unreach_timer)) return; else { if (!mod_timer(&t->proto_unreach_timer, jiffies + (HZ/20))) sctp_transport_hold(t); } } else { struct net *net = sock_net(sk); pr_debug("%s: unrecognized next header type " "encountered!\n", __func__); if (timer_delete(&t->proto_unreach_timer)) sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, t, GFP_ATOMIC); } } /* Common lookup code for icmp/icmpv6 error handler. */ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctphdr *sctphdr, struct sctp_association **app, struct sctp_transport **tpp) { struct sctp_init_chunk *chunkhdr, _chunkhdr; union sctp_addr saddr; union sctp_addr daddr; struct sctp_af *af; struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; __u32 vtag = ntohl(sctphdr->vtag); int sdif = inet_sdif(skb); int dif = inet_iif(skb); *app = NULL; *tpp = NULL; af = sctp_get_af_specific(family); if (unlikely(!af)) { return NULL; } /* Initialize local addresses for lookups. */ af->from_skb(&saddr, skb, 1); af->from_skb(&daddr, skb, 0); /* Look for an association that matches the incoming ICMP error * packet. */ asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport, dif, sdif); if (!asoc) return NULL; sk = asoc->base.sk; /* RFC 4960, Appendix C. ICMP Handling * * ICMP6) An implementation MUST validate that the Verification Tag * contained in the ICMP message matches the Verification Tag of * the peer. If the Verification Tag is not 0 and does NOT * match, discard the ICMP message. If it is 0 and the ICMP * message contains enough bytes to verify that the chunk type is * an INIT chunk and that the Initiate Tag matches the tag of the * peer, continue with ICMP7. If the ICMP message is too short * or the chunk type or the Initiate Tag does not match, silently * discard the packet. */ if (vtag == 0) { /* chunk header + first 4 octects of init header */ chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + sizeof(struct sctphdr), sizeof(struct sctp_chunkhdr) + sizeof(__be32), &_chunkhdr); if (!chunkhdr || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) goto out; } else if (vtag != asoc->c.peer_vtag) { goto out; } bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; return sk; out: sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ void sctp_err_finish(struct sock *sk, struct sctp_transport *t) __releases(&((__sk)->sk_lock.slock)) { bh_unlock_sock(sk); sctp_transport_put(t); } static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, __u8 type, __u8 code, __u32 info) { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; int err = 0; switch (type) { case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) return; if (code == ICMP_FRAG_NEEDED) { sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info)); return; } if (code == ICMP_PROT_UNREACH) { sctp_icmp_proto_unreachable(sk, asoc, t); return; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: if (code == ICMP_EXC_FRAGTIME) return; err = EHOSTUNREACH; break; case ICMP_REDIRECT: sctp_icmp_redirect(sk, t, skb); return; default: return; } if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) { sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ WRITE_ONCE(sk->sk_err_soft, err); } } /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0 * it's just the icmp type << 8 | icmp code. After adjustment * header points to the first 8 bytes of the sctp header. We need * to find the appropriate port. * * The locking strategy used here is very "optimistic". When * someone else accesses the socket the ICMP is just dropped * and for some paths there is no check at all. * A more general error queue to queue errors for later handling * is probably better. * */ int sctp_v4_err(struct sk_buff *skb, __u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); struct sctp_transport *transport; struct sctp_association *asoc; __u16 saveip, savesctp; struct sock *sk; /* Fix up skb to look at the embedded net header. */ saveip = skb->network_header; savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, iph->ihl * 4); sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original values. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; } sctp_v4_err_handle(transport, skb, type, code, info); sctp_err_finish(sk, transport); return 0; } int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct sctp_association *asoc; struct sctp_transport *t; struct icmphdr *hdr; __u32 info = 0; skb->transport_header += sizeof(struct udphdr); sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; } skb->transport_header -= sizeof(struct udphdr); hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr)); if (hdr->type == ICMP_REDIRECT) { /* can't be handled without outer iphdr known, leave it to udp_err */ sctp_err_finish(sk, t); return 0; } if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED) info = ntohs(hdr->un.frag.mtu); sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info); sctp_err_finish(sk, t); return 1; } /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * * This function scans all the chunks in the OOTB packet to determine if * the packet should be discarded right away. If a response might be needed * for this packet, or, if further processing is possible, the packet will * be queued to a proper inqueue for the next phase of handling. * * Output: * Return 0 - If further processing is needed. * Return 1 - If the packet can be discarded right away. */ static int sctp_rcv_ootb(struct sk_buff *skb) { struct sctp_chunkhdr *ch, _ch; int ch_end, offset = 0; /* Scan through all the chunks in the packet. */ do { /* Make sure we have at least the header there */ if (offset + sizeof(_ch) > skb->len) break; ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); /* Break out if chunk length is less then minimal. */ if (!ch || ntohs(ch->length) < sizeof(_ch)) break; ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the * receiver MUST silently discard the OOTB packet and take no * further action. */ if (SCTP_CID_ABORT == ch->type) goto discard; /* RFC 8.4, 6) If the packet contains a SHUTDOWN COMPLETE * chunk, the receiver should silently discard the packet * and take no further action. */ if (SCTP_CID_SHUTDOWN_COMPLETE == ch->type) goto discard; /* RFC 4460, 2.11.2 * This will discard packets with INIT chunk bundled as * subsequent chunks in the packet. When INIT is first, * the normal INIT processing will discard the chunk. */ if (SCTP_CID_INIT == ch->type && (void *)ch != skb->data) goto discard; offset = ch_end; } while (ch_end < skb->len); return 0; discard: return 1; } /* Insert endpoint into the hash table. */ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) { struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; int cnt = 0; err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; sctp_for_each_hentry(ep2, &head->chain) { struct sock *sk2 = ep2->base.sk; if (!net_eq(sock_net(sk2), net) || sk2 == sk || !uid_eq(sk_uid(sk2), sk_uid(sk)) || !sk2->sk_reuseport) continue; err = sctp_bind_addrs_check(sctp_sk(sk2), sctp_sk(sk), cnt); if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) goto out; break; } else if (err < 0) { goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) goto out; } } hlist_add_head(&ep->node, &head->chain); out: write_unlock(&head->lock); return err; } /* Add an endpoint to the hash. Local BH-safe. */ int sctp_hash_endpoint(struct sctp_endpoint *ep) { int err; local_bh_disable(); err = __sctp_hash_endpoint(ep); local_bh_enable(); return err; } /* Remove endpoint from the hash table. */ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { struct sock *sk = ep->base.sk; struct sctp_hashbucket *head; ep->hashent = sctp_ep_hashfn(sock_net(sk), ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); hlist_del_init(&ep->node); write_unlock(&head->lock); } /* Remove endpoint from the hash. Local BH-safe. */ void sctp_unhash_endpoint(struct sctp_endpoint *ep) { local_bh_disable(); __sctp_unhash_endpoint(ep); local_bh_enable(); } static inline __u32 sctp_hashfn(const struct net *net, __be16 lport, const union sctp_addr *paddr, __u32 seed) { __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else addr = (__force __u32)paddr->v4.sin_addr.s_addr; return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } /* Look up an endpoint. */ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif) { struct sctp_hashbucket *head; struct sctp_endpoint *ep; struct sock *sk; __be16 lport; int hash; lport = laddr->v4.sin_port; hash = sctp_ep_hashfn(net, ntohs(lport)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(ep, &head->chain) { if (sctp_endpoint_is_match(ep, net, laddr, dif, sdif)) goto hit; } ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: sk = ep->base.sk; if (sk->sk_reuseport) { __u32 phash = sctp_hashfn(net, lport, paddr, 0); sk = reuseport_select_sock(sk, phash, skb, sizeof(struct sctphdr)); if (sk) ep = sctp_sk(sk)->ep; } sctp_endpoint_hold(ep); read_unlock(&head->lock); return ep; } /* rhashtable for transport */ struct sctp_hash_cmp_arg { const union sctp_addr *paddr; const struct net *net; __be16 lport; }; static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) return err; if (!sctp_transport_hold(t)) return err; if (!net_eq(t->asoc->base.net, x->net)) goto out; if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; err = 0; out: sctp_transport_put(t); return err; } static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; return sctp_hashfn(t->asoc->base.net, htons(t->asoc->base.bind_addr.port), &t->ipaddr, seed); } static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed) { const struct sctp_hash_cmp_arg *x = data; return sctp_hashfn(x->net, x->lport, x->paddr, seed); } static const struct rhashtable_params sctp_hash_params = { .head_offset = offsetof(struct sctp_transport, node), .hashfn = sctp_hash_key, .obj_hashfn = sctp_hash_obj, .obj_cmpfn = sctp_hash_cmp, .automatic_shrinking = true, }; int sctp_transport_hashtable_init(void) { return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params); } void sctp_transport_hashtable_destroy(void) { rhltable_destroy(&sctp_transport_hashtable); } int sctp_hash_transport(struct sctp_transport *t) { struct sctp_transport *transport; struct rhlist_head *tmp, *list; struct sctp_hash_cmp_arg arg; int err; if (t->asoc->temp) return 0; arg.net = t->asoc->base.net; arg.paddr = &t->ipaddr; arg.lport = htons(t->asoc->base.bind_addr.port); rcu_read_lock(); list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(transport, tmp, list, node) if (transport->asoc->ep == t->asoc->ep) { rcu_read_unlock(); return -EEXIST; } rcu_read_unlock(); err = rhltable_insert_key(&sctp_transport_hashtable, &arg, &t->node, sctp_hash_params); if (err) pr_err_once("insert transport fail, errno %d\n", err); return err; } void sctp_unhash_transport(struct sctp_transport *t) { if (t->asoc->temp) return; rhltable_remove(&sctp_transport_hashtable, &t->node, sctp_hash_params); } bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { bool l3mdev_accept = true; #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) l3mdev_accept = !!READ_ONCE(net->sctp.l3mdev_accept); #endif return inet_bound_dev_eq(l3mdev_accept, bound_dev_if, dif, sdif); } /* return a transport with holding it */ struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif) { struct rhlist_head *tmp, *list; struct sctp_transport *t; int bound_dev_if; struct sctp_hash_cmp_arg arg = { .paddr = paddr, .net = net, .lport = laddr->v4.sin_port, }; list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(t, tmp, list, node) { if (!sctp_transport_hold(t)) continue; bound_dev_if = READ_ONCE(t->asoc->base.sk->sk_bound_dev_if); if (sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && sctp_bind_addr_match(&t->asoc->base.bind_addr, laddr, sctp_sk(t->asoc->base.sk))) return t; sctp_transport_put(t); } return NULL; } /* return a transport without holding it, as it's only used under sock lock */ struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr) { struct rhlist_head *tmp, *list; struct sctp_transport *t; struct sctp_hash_cmp_arg arg = { .paddr = paddr, .net = ep->base.net, .lport = htons(ep->base.bind_addr.port), }; list = rhltable_lookup(&sctp_transport_hashtable, &arg, sctp_hash_params); rhl_for_each_entry_rcu(t, tmp, list, node) if (ep == t->asoc->ep) return t; return NULL; } /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt, int dif, int sdif) { struct sctp_transport *t; struct sctp_association *asoc = NULL; t = sctp_addrs_lookup_transport(net, local, peer, dif, sdif); if (!t) goto out; asoc = t->asoc; *pt = t; out: return asoc; } /* Look up an association. protected by RCU read lock */ static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc; rcu_read_lock(); asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); rcu_read_unlock(); return asoc; } /* Is there an association matching the given local and peer addresses? */ bool sctp_has_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif) { struct sctp_transport *transport; if (sctp_lookup_association(net, laddr, paddr, &transport, dif, sdif)) { sctp_transport_put(transport); return true; } return false; } /* * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. * * D) When searching for a matching TCB upon reception of an INIT * or INIT-ACK chunk the receiver SHOULD use not only the * source address of the packet (containing the INIT or * INIT-ACK) but the receiver SHOULD also use all valid * address parameters contained within the chunk. * * 2.18.3 Solution description * * This new text clearly specifies to an implementor the need * to look within the INIT or INIT-ACK. Any implementation that * does not do this, may not be able to establish associations * in certain circumstances. * */ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc; union sctp_addr addr; union sctp_addr *paddr = &addr; struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; struct sctp_init_chunk *init; struct sctp_af *af; /* * This code will NOT touch anything inside the chunk--it is * strictly READ-ONLY. * * RFC 2960 3 SCTP packet Format * * Multiple chunks can be bundled into one SCTP packet up to * the MTU size, except for the INIT, INIT ACK, and SHUTDOWN * COMPLETE chunks. These chunks MUST NOT be bundled with any * other chunk in a packet. See Section 6.10 for more details * on chunk bundling. */ /* Find the start of the TLVs and the end of the chunk. This is * the region we search for address parameters. */ init = (struct sctp_init_chunk *)skb->data; /* Walk the parameters looking for embedded addresses. */ sctp_walk_params(params, init) { /* Note: Ignoring hostname addresses. */ af = sctp_get_af_specific(param_type2af(params.p->type)); if (!af) continue; if (!af->from_addr_param(paddr, params.addr, sh->source, 0)) continue; asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) return asoc; } return NULL; } /* ADD-IP, Section 5.2 * When an endpoint receives an ASCONF Chunk from the remote peer * special procedures may be needed to identify the association the * ASCONF Chunk is associated with. To properly find the association * the following procedures SHOULD be followed: * * D2) If the association is not found, use the address found in the * Address Parameter TLV combined with the port number found in the * SCTP common header. If found proceed to rule D4. * * D2-ext) If more than one ASCONF Chunks are packed together, use the * address found in the ASCONF Address Parameter TLV of each of the * subsequent ASCONF Chunks. If found, proceed to rule D4. */ static struct sctp_association *__sctp_rcv_asconf_lookup( struct net *net, struct sctp_chunkhdr *ch, const union sctp_addr *laddr, __be16 peer_port, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch; struct sctp_af *af; union sctp_addr_param *param; union sctp_addr paddr; if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr)) return NULL; /* Skip over the ADDIP header and find the Address parameter */ param = (union sctp_addr_param *)(asconf + 1); af = sctp_get_af_specific(param_type2af(param->p.type)); if (unlikely(!af)) return NULL; if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp, dif, sdif); } /* SCTP-AUTH, Section 6.3: * If the receiver does not find a STCB for a packet containing an AUTH * chunk as the first chunk and not a COOKIE-ECHO chunk as the second * chunk, it MUST use the chunks after the AUTH chunk to look up an existing * association. * * This means that any chunks that can help us identify the association need * to be looked at to find this association. */ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc = NULL; struct sctp_chunkhdr *ch; int have_auth = 0; unsigned int chunk_num = 1; __u8 *ch_end; /* Walk through the chunks looking for AUTH or ASCONF chunks * to help us find the association. */ ch = (struct sctp_chunkhdr *)skb->data; do { /* Break out if chunk length is less then minimal. */ if (ntohs(ch->length) < sizeof(*ch)) break; ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; switch (ch->type) { case SCTP_CID_AUTH: have_auth = chunk_num; break; case SCTP_CID_COOKIE_ECHO: /* If a packet arrives containing an AUTH chunk as * a first chunk, a COOKIE-ECHO chunk as the second * chunk, and possibly more chunks after them, and * the receiver does not have an STCB for that * packet, then authentication is based on * the contents of the COOKIE- ECHO chunk. */ if (have_auth == 1 && chunk_num == 2) return NULL; break; case SCTP_CID_ASCONF: if (have_auth || net->sctp.addip_noauth) asoc = __sctp_rcv_asconf_lookup( net, ch, laddr, sctp_hdr(skb)->source, transportp, dif, sdif); break; default: break; } if (asoc) break; ch = (struct sctp_chunkhdr *)ch_end; chunk_num++; } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb)); return asoc; } /* * There are circumstances when we need to look inside the SCTP packet * for information to help us find the association. Examples * include looking inside of INIT/INIT-ACK chunks or after the AUTH * chunks. */ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_chunkhdr *ch; /* We do not allow GSO frames here as we need to linearize and * then cannot guarantee frame boundaries. This shouldn't be an * issue as packets hitting this are mostly INIT or INIT-ACK and * those cannot be on GSO-style anyway. */ if (skb_is_gso(skb) && skb_is_gso_sctp(skb)) return NULL; ch = (struct sctp_chunkhdr *)skb->data; /* The code below will attempt to walk the chunk and extract * parameter information. Before we do that, we need to verify * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) return __sctp_rcv_init_lookup(net, skb, laddr, transportp, dif, sdif); return __sctp_rcv_walk_lookup(net, skb, laddr, transportp, dif, sdif); } /* Lookup an association for an inbound skb. */ static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp, int dif, int sdif) { struct sctp_association *asoc; asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) goto out; /* Further lookup for INIT/INIT-ACK packets. * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp, dif, sdif); if (asoc) goto out; if (paddr->sa.sa_family == AF_INET) pr_debug("sctp: asoc not found for src:%pI4:%d dst:%pI4:%d\n", &laddr->v4.sin_addr, ntohs(laddr->v4.sin_port), &paddr->v4.sin_addr, ntohs(paddr->v4.sin_port)); else pr_debug("sctp: asoc not found for src:%pI6:%d dst:%pI6:%d\n", &laddr->v6.sin6_addr, ntohs(laddr->v6.sin6_port), &paddr->v6.sin6_addr, ntohs(paddr->v6.sin6_port)); out: return asoc; }
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 #ifndef __NET_TUN_PROTO_H #define __NET_TUN_PROTO_H #include <linux/if_ether.h> #include <linux/types.h> /* One byte protocol values as defined by VXLAN-GPE and NSH. These will * hopefully get a shared IANA registry. */ #define TUN_P_IPV4 0x01 #define TUN_P_IPV6 0x02 #define TUN_P_ETHERNET 0x03 #define TUN_P_NSH 0x04 #define TUN_P_MPLS_UC 0x05 static inline __be16 tun_p_to_eth_p(u8 proto) { switch (proto) { case TUN_P_IPV4: return htons(ETH_P_IP); case TUN_P_IPV6: return htons(ETH_P_IPV6); case TUN_P_ETHERNET: return htons(ETH_P_TEB); case TUN_P_NSH: return htons(ETH_P_NSH); case TUN_P_MPLS_UC: return htons(ETH_P_MPLS_UC); } return 0; } static inline u8 tun_p_from_eth_p(__be16 proto) { switch (proto) { case htons(ETH_P_IP): return TUN_P_IPV4; case htons(ETH_P_IPV6): return TUN_P_IPV6; case htons(ETH_P_TEB): return TUN_P_ETHERNET; case htons(ETH_P_NSH): return TUN_P_NSH; case htons(ETH_P_MPLS_UC): return TUN_P_MPLS_UC; } return 0; } #endif
40 3 3 3 3 3 3 3 3 3 40 40 40 40 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 4 2 1 1 2 6 6 6 2 6 3 3 3 3 3 3 3 6 3 6 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 37 37 37 37 37 3 37 26 36 36 36 36 36 34 34 34 34 34 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH * Copyright 2018-2020, 2022-2025 Intel Corporation */ #include <crypto/utils.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/export.h> #include <net/mac80211.h> #include <linux/unaligned.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "debugfs_key.h" #include "aes_ccm.h" #include "aes_cmac.h" #include "aes_gmac.h" #include "aes_gcm.h" /** * DOC: Key handling basics * * Key handling in mac80211 is done based on per-interface (sub_if_data) * keys and per-station keys. Since each station belongs to an interface, * each station key also belongs to that interface. * * Hardware acceleration is done on a best-effort basis for algorithms * that are implemented in software, for each key the hardware is asked * to enable that key for offloading but if it cannot do that the key is * simply kept for software encryption (unless it is for an algorithm * that isn't implemented in software). * There is currently no way of knowing whether a key is handled in SW * or HW except by looking into debugfs. * * All key management is internally protected by a mutex. Within all * other parts of mac80211, key references are, just as STA structure * references, protected by RCU. Note, however, that some things are * unprotected, namely the key->sta dereferences within the hardware * acceleration functions. This means that sta_info_destroy() must * remove the key which waits for an RCU grace period. */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; static void update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { struct ieee80211_sub_if_data *vlan; if (sdata->vif.type != NL80211_IFTYPE_AP) return; /* crypto_tx_tailroom_needed_cnt is protected by this */ lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(vlan, &sdata->u.ap.vlans, u.vlan.list) vlan->crypto_tx_tailroom_needed_cnt += delta; rcu_read_unlock(); } static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) { /* * When this count is zero, SKB resizing for allocating tailroom * for IV or MMIC is skipped. But, this check has created two race * cases in xmit path while transiting from zero count to one: * * 1. SKB resize was skipped because no key was added but just before * the xmit key is added and SW encryption kicks off. * * 2. SKB resize was skipped because all the keys were hw planted but * just before xmit one of the key is deleted and SW encryption kicks * off. * * In both the above case SW encryption will find not enough space for * tailroom and exits with WARN_ON. (See WARN_ONs at wpa.c) * * Solution has been explained at * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ lockdep_assert_wiphy(sdata->local->hw.wiphy); update_vlan_tailroom_need_count(sdata, 1); if (!sdata->crypto_tx_tailroom_needed_cnt++) { /* * Flush all XMIT packets currently using HW encryption or no * encryption at all if the count transition is from 0 -> 1. */ synchronize_net(); } } static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); update_vlan_tailroom_need_count(sdata, -delta); sdata->crypto_tx_tailroom_needed_cnt -= delta; } static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata = key->sdata; struct sta_info *sta; int ret = -EOPNOTSUPP; might_sleep(); lockdep_assert_wiphy(key->local->hw.wiphy); if (key->flags & KEY_FLAG_TAINTED) { /* If we get here, it's during resume and the key is * tainted so shouldn't be used/programmed any more. * However, its flags may still indicate that it was * programmed into the device (since we're in resume) * so clear that flag now to avoid trying to remove * it again later. */ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && !(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; return -EINVAL; } if (!key->local->ops->set_key) goto out_unsupported; sta = key->sta; /* * If this is a per-STA GTK, check if it * is supported; if not, return. */ if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) && !ieee80211_hw_check(&key->local->hw, SUPPORTS_PER_STA_GTK)) goto out_unsupported; if (sta && !sta->uploaded) goto out_unsupported; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { /* * The driver doesn't know anything about VLAN interfaces. * Hence, don't send GTKs for VLAN interfaces to the driver. */ if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { ret = 1; goto out_unsupported; } } if (key->conf.link_id >= 0 && sdata->vif.active_links && !(sdata->vif.active_links & BIT(key->conf.link_id))) return 0; ret = drv_set_key(key->local, SET_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) decrease_tailroom_need_count(sdata, 1); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_MIC_SPACE) && (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)); return 0; } if (ret != -ENOSPC && ret != -EOPNOTSUPP && ret != 1) sdata_err(sdata, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); out_unsupported: switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: /* all of these we can do in software - if driver can */ if (ret == 1) return 0; if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) return -EINVAL; return 0; default: return -EINVAL; } } static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; int ret; might_sleep(); if (!key || !key->local->ops->set_key) return; if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; sta = key->sta; sdata = key->sdata; lockdep_assert_wiphy(key->local->hw.wiphy); if (key->conf.link_id >= 0 && sdata->vif.active_links && !(sdata->vif.active_links & BIT(key->conf.link_id))) return; if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; ret = drv_set_key(key->local, DISABLE_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); if (ret) sdata_err(sdata, "failed to remove key (%d, %pM) from hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); } static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force) { struct sta_info *sta = key->sta; struct ieee80211_local *local = key->local; lockdep_assert_wiphy(local->hw.wiphy); set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION); sta->ptk_idx = key->conf.keyidx; if (force || !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_check_fast_xmit(sta); return 0; } int ieee80211_set_tx_key(struct ieee80211_key *key) { return _ieee80211_set_tx_key(key, false); } static void ieee80211_pairwise_rekey(struct ieee80211_key *old, struct ieee80211_key *new) { struct ieee80211_local *local = new->local; struct sta_info *sta = new->sta; int i; lockdep_assert_wiphy(local->hw.wiphy); if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) { /* Extended Key ID key install, initial one or rekey */ if (sta->ptk_idx != INVALID_PTK_KEYIDX && !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) { /* Aggregation Sessions with Extended Key ID must not * mix MPDUs with different keyIDs within one A-MPDU. * Tear down running Tx aggregation sessions and block * new Rx/Tx aggregation requests during rekey to * ensure there are no A-MPDUs when the driver is not * supporting A-MPDU key borders. (Blocking Tx only * would be sufficient but WLAN_STA_BLOCK_BA gets the * job done for the few ms we need it.) */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); for (i = 0; i < IEEE80211_NUM_TIDS; i++) __ieee80211_stop_tx_ba_session(sta, i, AGG_STOP_LOCAL_REQUEST); } } else if (old) { /* Rekey without Extended Key ID. * Aggregation sessions are OK when running on SW crypto. * A broken remote STA may cause issues not observed with HW * crypto, though. */ if (!(old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; /* Stop Tx till we are on the new key */ old->flags |= KEY_FLAG_TAINTED; ieee80211_clear_fast_xmit(sta); if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_LOCAL_REQUEST); } if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0)) { pr_warn_ratelimited("Rekeying PTK for STA %pM but driver can't safely do that.", sta->sta.addr); /* Flushing the driver queues *may* help prevent * the clear text leaks and freezes. */ ieee80211_flush_queues(local, old->sdata, false); } } } static void __ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= 0 && idx < NUM_DEFAULT_KEYS) { key = wiphy_dereference(sdata->local->hw.wiphy, sdata->keys[idx]); if (!key) key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); } if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); ieee80211_check_fast_xmit_iface(sdata); if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN) drv_set_default_unicast_key(sdata->local, sdata, idx); } if (multi) rcu_assign_pointer(link->default_multicast_key, key); ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { lockdep_assert_wiphy(link->sdata->local->hw.wiphy); __ieee80211_set_default_key(link, idx, uni, multi); } static void __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); rcu_assign_pointer(link->default_mgmt_key, key); ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { lockdep_assert_wiphy(link->sdata->local->hw.wiphy); __ieee80211_set_default_mgmt_key(link, idx); } static void __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); rcu_assign_pointer(link->default_beacon_key, key); ieee80211_debugfs_key_update_default(sdata); } void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { lockdep_assert_wiphy(link->sdata->local->hw.wiphy); __ieee80211_set_default_beacon_key(link, idx); } static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct sta_info *sta, bool pairwise, struct ieee80211_key *old, struct ieee80211_key *new) { struct link_sta_info *link_sta = sta ? &sta->deflink : NULL; int link_id; int idx; int ret = 0; bool defunikey, defmultikey, defmgmtkey, defbeaconkey; bool is_wep; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* caller must provide at least one old/new */ if (WARN_ON(!new && !old)) return 0; if (new) { idx = new->conf.keyidx; is_wep = new->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || new->conf.cipher == WLAN_CIPHER_SUITE_WEP104; link_id = new->conf.link_id; } else { idx = old->conf.keyidx; is_wep = old->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || old->conf.cipher == WLAN_CIPHER_SUITE_WEP104; link_id = old->conf.link_id; } if (WARN(old && old->conf.link_id != link_id, "old link ID %d doesn't match new link ID %d\n", old->conf.link_id, link_id)) return -EINVAL; if (link_id >= 0) { if (!link) { link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; } if (sta) { link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) return -ENOLINK; } } else { link = &sdata->deflink; } if ((is_wep || pairwise) && idx >= NUM_DEFAULT_KEYS) return -EINVAL; WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); if (new && sta && pairwise) { /* Unicast rekey needs special handling. With Extended Key ID * old is still NULL for the first rekey. */ ieee80211_pairwise_rekey(old, new); } if (old) { if (old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { ieee80211_key_disable_hw_accel(old); if (new) ret = ieee80211_key_enable_hw_accel(new); } } else { if (!new->local->wowlan) { ret = ieee80211_key_enable_hw_accel(new); } else if (link_id < 0 || !sdata->vif.active_links || BIT(link_id) & sdata->vif.active_links) { new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; if (!(new->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) decrease_tailroom_need_count(sdata, 1); } } if (ret) return ret; if (new) list_add_tail_rcu(&new->list, &sdata->key_list); if (sta) { if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); if (new && !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) _ieee80211_set_tx_key(new, true); } else { rcu_assign_pointer(link_sta->gtk[idx], new); } /* Only needed for transition from no key -> key. * Still triggers unnecessary when using Extended Key ID * and installing the second key ID the first time. */ if (new && !old) ieee80211_check_fast_rx(sta); } else { defunikey = old && old == wiphy_dereference(sdata->local->hw.wiphy, sdata->default_unicast_key); defmultikey = old && old == wiphy_dereference(sdata->local->hw.wiphy, link->default_multicast_key); defmgmtkey = old && old == wiphy_dereference(sdata->local->hw.wiphy, link->default_mgmt_key); defbeaconkey = old && old == wiphy_dereference(sdata->local->hw.wiphy, link->default_beacon_key); if (defunikey && !new) __ieee80211_set_default_key(link, -1, true, false); if (defmultikey && !new) __ieee80211_set_default_key(link, -1, false, true); if (defmgmtkey && !new) __ieee80211_set_default_mgmt_key(link, -1); if (defbeaconkey && !new) __ieee80211_set_default_beacon_key(link, -1); if (is_wep || pairwise) rcu_assign_pointer(sdata->keys[idx], new); else rcu_assign_pointer(link->gtk[idx], new); if (defunikey && new) __ieee80211_set_default_key(link, new->conf.keyidx, true, false); if (defmultikey && new) __ieee80211_set_default_key(link, new->conf.keyidx, false, true); if (defmgmtkey && new) __ieee80211_set_default_mgmt_key(link, new->conf.keyidx); if (defbeaconkey && new) __ieee80211_set_default_beacon_key(link, new->conf.keyidx); } if (old) list_del_rcu(&old->list); return 0; } struct ieee80211_key * ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq) { struct ieee80211_key *key; int i, j, err; if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS)) return ERR_PTR(-EINVAL); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); if (!key) return ERR_PTR(-ENOMEM); /* * Default to software encryption; we'll later upload the * key to the hardware if possible. */ key->conf.flags = 0; key->flags = 0; key->conf.link_id = -1; key->conf.cipher = cipher; key->conf.keyidx = idx; key->conf.keylen = key_len; switch (cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: key->conf.iv_len = IEEE80211_WEP_IV_LEN; key->conf.icv_len = IEEE80211_WEP_ICV_LEN; break; case WLAN_CIPHER_SUITE_TKIP: key->conf.iv_len = IEEE80211_TKIP_IV_LEN; key->conf.icv_len = IEEE80211_TKIP_ICV_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS; i++) { key->u.tkip.rx[i].iv32 = get_unaligned_le32(&seq[2]); key->u.tkip.rx[i].iv16 = get_unaligned_le16(seq); } } spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: key->conf.iv_len = IEEE80211_CCMP_HDR_LEN; key->conf.icv_len = IEEE80211_CCMP_MIC_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = seq[IEEE80211_CCMP_PN_LEN - j - 1]; } /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( key_data, key_len, IEEE80211_CCMP_MIC_LEN); if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_CCMP_256: key->conf.iv_len = IEEE80211_CCMP_256_HDR_LEN; key->conf.icv_len = IEEE80211_CCMP_256_MIC_LEN; for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) for (j = 0; j < IEEE80211_CCMP_256_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = seq[IEEE80211_CCMP_256_PN_LEN - j - 1]; /* Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( key_data, key_len, IEEE80211_CCMP_256_MIC_LEN); if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->conf.iv_len = 0; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) key->conf.icv_len = sizeof(struct ieee80211_mmie); else key->conf.icv_len = sizeof(struct ieee80211_mmie_16); if (seq) for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = seq[IEEE80211_CMAC_PN_LEN - j - 1]; /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ err = aes_cmac_preparekey(&key->u.aes_cmac.key, key_data, key_len); if (err) { kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie_16); if (seq) for (j = 0; j < IEEE80211_GMAC_PN_LEN; j++) key->u.aes_gmac.rx_pn[j] = seq[IEEE80211_GMAC_PN_LEN - j - 1]; /* Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.aes_gmac.tfm = ieee80211_aes_gmac_key_setup(key_data, key_len); if (IS_ERR(key->u.aes_gmac.tfm)) { err = PTR_ERR(key->u.aes_gmac.tfm); kfree(key); return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: key->conf.iv_len = IEEE80211_GCMP_HDR_LEN; key->conf.icv_len = IEEE80211_GCMP_MIC_LEN; for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) for (j = 0; j < IEEE80211_GCMP_PN_LEN; j++) key->u.gcmp.rx_pn[i][j] = seq[IEEE80211_GCMP_PN_LEN - j - 1]; /* Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ key->u.gcmp.tfm = ieee80211_aes_gcm_key_setup_encrypt(key_data, key_len); if (IS_ERR(key->u.gcmp.tfm)) { err = PTR_ERR(key->u.gcmp.tfm); kfree(key); return ERR_PTR(err); } break; } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); return key; } static void ieee80211_key_free_common(struct ieee80211_key *key) { switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: ieee80211_aes_key_free(key->u.ccmp.tfm); break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: ieee80211_aes_gmac_key_free(key->u.aes_gmac.tfm); break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: ieee80211_aes_gcm_key_free(key->u.gcmp.tfm); break; } kfree_sensitive(key); } static void __ieee80211_key_destroy(struct ieee80211_key *key, bool delay_tailroom) { if (key->local) { struct ieee80211_sub_if_data *sdata = key->sdata; ieee80211_debugfs_key_remove(key); if (delay_tailroom) { /* see ieee80211_delayed_tailroom_dec */ sdata->crypto_tx_tailroom_pending_dec++; wiphy_delayed_work_queue(sdata->local->hw.wiphy, &sdata->dec_tailroom_needed_wk, HZ / 2); } else { decrease_tailroom_need_count(sdata, 1); } } ieee80211_key_free_common(key); } static void ieee80211_key_destroy(struct ieee80211_key *key, bool delay_tailroom) { if (!key) return; /* * Synchronize so the TX path and rcu key iterators * can no longer be using this key before we free/remove it. */ synchronize_net(); __ieee80211_key_destroy(key, delay_tailroom); } void ieee80211_key_free_unused(struct ieee80211_key *key) { if (!key) return; WARN_ON(key->sdata || key->local); ieee80211_key_free_common(key); } static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata, struct ieee80211_key *old, struct ieee80211_key *new) { u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP]; u8 *tk_old, *tk_new; if (!old || new->conf.keylen != old->conf.keylen) return false; tk_old = old->conf.key; tk_new = new->conf.key; /* * In station mode, don't compare the TX MIC key, as it's never used * and offloaded rekeying may not care to send it to the host. This * is the case in iwlwifi, for example. */ if (sdata->vif.type == NL80211_IFTYPE_STATION && new->conf.cipher == WLAN_CIPHER_SUITE_TKIP && new->conf.keylen == WLAN_KEY_LEN_TKIP && !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP); memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP); memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); tk_old = tkip_old; tk_new = tkip_new; } return !crypto_memneq(tk_old, tk_new, new->conf.keylen); } int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_link_data *link, struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = link->sdata; static atomic_t key_color = ATOMIC_INIT(0); struct ieee80211_key *old_key = NULL; int idx = key->conf.keyidx; bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; /* * We want to delay tailroom updates only for station - in that * case it helps roaming speed, but in other cases it hurts and * can cause warnings to appear. */ bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (sta && pairwise) { struct ieee80211_key *alt_key; old_key = wiphy_dereference(sdata->local->hw.wiphy, sta->ptk[idx]); alt_key = wiphy_dereference(sdata->local->hw.wiphy, sta->ptk[idx ^ 1]); /* The rekey code assumes that the old and new key are using * the same cipher. Enforce the assumption for pairwise keys. */ if ((alt_key && alt_key->conf.cipher != key->conf.cipher) || (old_key && old_key->conf.cipher != key->conf.cipher)) { ret = -EOPNOTSUPP; goto out; } } else if (sta) { struct link_sta_info *link_sta = &sta->deflink; int link_id = key->conf.link_id; if (link_id >= 0) { link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) { ret = -ENOLINK; goto out; } } old_key = wiphy_dereference(sdata->local->hw.wiphy, link_sta->gtk[idx]); } else { if (idx < NUM_DEFAULT_KEYS) old_key = wiphy_dereference(sdata->local->hw.wiphy, sdata->keys[idx]); if (!old_key) old_key = wiphy_dereference(sdata->local->hw.wiphy, link->gtk[idx]); } /* Non-pairwise keys must also not switch the cipher on rekey */ if (!pairwise) { if (old_key && old_key->conf.cipher != key->conf.cipher) { ret = -EOPNOTSUPP; goto out; } } /* * Silently accept key re-installation without really installing the * new version of the key to avoid nonce reuse or replay issues. */ if (ieee80211_key_identical(sdata, old_key, key)) { ret = -EALREADY; goto out; } key->local = sdata->local; key->sdata = sdata; key->sta = sta; /* * Assign a unique ID to every key so we can easily prevent mixed * key and fragment cache attacks. */ key->color = atomic_inc_return(&key_color); /* keep this flag for easier access later */ if (sta && sta->sta.spp_amsdu) key->conf.flags |= IEEE80211_KEY_FLAG_SPP_AMSDU; increment_tailroom_need_count(sdata); ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key); if (!ret) { ieee80211_debugfs_key_add(key); ieee80211_key_destroy(old_key, delay_tailroom); } else { ieee80211_key_free(key, delay_tailroom); } key = NULL; out: ieee80211_key_free_unused(key); return ret; } void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) { if (!key) return; /* * Replace key with nothingness if it was ever used. */ if (key->sdata) ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); ieee80211_key_destroy(key, delay_tailroom); } void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; struct ieee80211_sub_if_data *vlan; lockdep_assert_wiphy(sdata->local->hw.wiphy); sdata->crypto_tx_tailroom_needed_cnt = 0; sdata->crypto_tx_tailroom_pending_dec = 0; if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->crypto_tx_tailroom_needed_cnt = 0; vlan->crypto_tx_tailroom_pending_dec = 0; } } if (ieee80211_sdata_running(sdata)) { list_for_each_entry(key, &sdata->key_list, list) { if (!(key->flags & KEY_FLAG_TAINTED)) increment_tailroom_need_count(sdata); ieee80211_key_enable_hw_accel(key); } } } static void ieee80211_key_iter(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_key *key, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { /* skip keys of station in removal process */ if (key->sta && key->sta->removed) return; if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; iter(hw, vif, key->sta ? &key->sta->sta : NULL, &key->conf, iter_data); } void ieee80211_iter_keys(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_key *key, *tmp; struct ieee80211_sub_if_data *sdata; lockdep_assert_wiphy(hw->wiphy); if (vif) { sdata = vif_to_sdata(vif); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) ieee80211_key_iter(hw, vif, key, iter, iter_data); } else { list_for_each_entry(sdata, &local->interfaces, list) list_for_each_entry_safe(key, tmp, &sdata->key_list, list) ieee80211_key_iter(hw, &sdata->vif, key, iter, iter_data); } } EXPORT_SYMBOL(ieee80211_iter_keys); static void _ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { struct ieee80211_key *key; list_for_each_entry_rcu(key, &sdata->key_list, list) ieee80211_key_iter(hw, &sdata->vif, key, iter, iter_data); } void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void (*iter)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key, void *data), void *iter_data) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; if (vif) { sdata = vif_to_sdata(vif); _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); } else { list_for_each_entry_rcu(sdata, &local->interfaces, list) _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); } } EXPORT_SYMBOL(ieee80211_iter_keys_rcu); static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, struct list_head *keys) { struct ieee80211_key *key, *tmp; decrease_tailroom_need_count(sdata, sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; ieee80211_debugfs_key_remove_mgmt_default(sdata); ieee80211_debugfs_key_remove_beacon_default(sdata); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); list_add_tail(&key->list, keys); } ieee80211_debugfs_key_update_default(sdata); } void ieee80211_remove_link_keys(struct ieee80211_link_data *link, struct list_head *keys) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_key *key, *tmp; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { if (key->conf.link_id != link->link_id) continue; ieee80211_key_replace(key->sdata, link, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); list_add_tail(&key->list, keys); } } void ieee80211_free_key_list(struct ieee80211_local *local, struct list_head *keys) { struct ieee80211_key *key, *tmp; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(key, tmp, keys, list) __ieee80211_key_destroy(key, false); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *vlan; struct ieee80211_sub_if_data *master; struct ieee80211_key *key, *tmp; LIST_HEAD(keys); wiphy_delayed_work_cancel(local->hw.wiphy, &sdata->dec_tailroom_needed_wk); lockdep_assert_wiphy(local->hw.wiphy); ieee80211_free_keys_iface(sdata, &keys); if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) ieee80211_free_keys_iface(vlan, &keys); } if (!list_empty(&keys) || force_synchronize) synchronize_net(); list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, false); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (sdata->bss) { master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt != master->crypto_tx_tailroom_needed_cnt); } } else { WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); } if (sdata->vif.type == NL80211_IFTYPE_AP) { list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || vlan->crypto_tx_tailroom_pending_dec); } } void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta) { struct ieee80211_key *key; int i; lockdep_assert_wiphy(local->hw.wiphy); for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) { key = wiphy_dereference(local->hw.wiphy, sta->deflink.gtk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } for (i = 0; i < NUM_DEFAULT_KEYS; i++) { key = wiphy_dereference(local->hw.wiphy, sta->ptk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } } void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; sdata = container_of(wk, struct ieee80211_sub_if_data, dec_tailroom_needed_wk.work); /* * The reason for the delayed tailroom needed decrementing is to * make roaming faster: during roaming, all keys are first deleted * and then new keys are installed. The first new key causes the * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes * the cost of synchronize_net() (which can be slow). Avoid this * by deferring the crypto_tx_tailroom_needed_cnt decrementing on * key removal for a while, so if we roam the value is larger than * zero and no 0->1 transition happens. * * The cost is that if the AP switching was from an AP with keys * to one without, we still allocate tailroom while it would no * longer be needed. However, in the typical (fast) roaming case * within an ESS this usually won't happen. */ decrease_tailroom_need_count(sdata, sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; } void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); trace_api_gtk_rekey_notify(sdata, bssid, replay_ctr); cfg80211_gtk_rekey_notify(sdata->dev, bssid, replay_ctr, gfp); } EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify); void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, int tid, struct ieee80211_key_seq *seq) { struct ieee80211_key *key; const u8 *pn; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS)) return; seq->tkip.iv32 = key->u.tkip.rx[tid].iv32; seq->tkip.iv16 = key->u.tkip.rx[tid].iv16; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_gmac.rx_pn; memcpy(seq->aes_gmac.pn, pn, IEEE80211_GMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.gcmp.rx_pn[tid]; memcpy(seq->gcmp.pn, pn, IEEE80211_GCMP_PN_LEN); break; } } EXPORT_SYMBOL(ieee80211_get_key_rx_seq); void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, int tid, struct ieee80211_key_seq *seq) { struct ieee80211_key *key; u8 *pn; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS)) return; key->u.tkip.rx[tid].iv32 = seq->tkip.iv32; key->u.tkip.rx[tid].iv16 = seq->tkip.iv16; break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_gmac.rx_pn; memcpy(pn, seq->aes_gmac.pn, IEEE80211_GMAC_PN_LEN); break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.gcmp.rx_pn[tid]; memcpy(pn, seq->gcmp.pn, IEEE80211_GCMP_PN_LEN); break; default: WARN_ON(1); break; } } EXPORT_SYMBOL_GPL(ieee80211_set_key_rx_seq); struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, u8 idx, u8 *key_data, u8 key_len, int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_key *prev_key; struct ieee80211_key *key; int err; struct ieee80211_link_data *link_data = link_id < 0 ? &sdata->deflink : sdata_dereference(sdata->link[link_id], sdata); if (WARN_ON(!link_data)) return ERR_PTR(-EINVAL); if (WARN_ON(!local->wowlan)) return ERR_PTR(-EINVAL); if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return ERR_PTR(-EINVAL); if (WARN_ON(idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS)) return ERR_PTR(-EINVAL); prev_key = wiphy_dereference(local->hw.wiphy, link_data->gtk[idx]); if (!prev_key) { if (idx < NUM_DEFAULT_KEYS) { for (int i = 0; i < NUM_DEFAULT_KEYS; i++) { if (i == idx) continue; prev_key = wiphy_dereference(local->hw.wiphy, link_data->gtk[i]); if (prev_key) break; } } else { /* For IGTK we have 4 and 5 and for BIGTK - 6 and 7 */ prev_key = wiphy_dereference(local->hw.wiphy, link_data->gtk[idx ^ 1]); } } if (WARN_ON(!prev_key)) return ERR_PTR(-EINVAL); if (WARN_ON(key_len < prev_key->conf.keylen)) return ERR_PTR(-EINVAL); key = ieee80211_key_alloc(prev_key->conf.cipher, idx, prev_key->conf.keylen, key_data, 0, NULL); if (IS_ERR(key)) return ERR_CAST(key); if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; key->conf.link_id = link_data->link_id; err = ieee80211_key_link(key, link_data, NULL); if (err) return ERR_PTR(err); return &key->conf; } EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_add); void ieee80211_key_mic_failure(struct ieee80211_key_conf *keyconf) { struct ieee80211_key *key; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->u.aes_cmac.icverrors++; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: key->u.aes_gmac.icverrors++; break; default: /* ignore the others for now, we don't keep counters now */ break; } } EXPORT_SYMBOL_GPL(ieee80211_key_mic_failure); void ieee80211_key_replay(struct ieee80211_key_conf *keyconf) { struct ieee80211_key *key; key = container_of(keyconf, struct ieee80211_key, conf); switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: key->u.ccmp.replays++; break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->u.aes_cmac.replays++; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: key->u.aes_gmac.replays++; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: key->u.gcmp.replays++; break; } } EXPORT_SYMBOL_GPL(ieee80211_key_replay); int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, unsigned long del_links_mask, unsigned long add_links_mask) { struct ieee80211_key *key; int ret; list_for_each_entry(key, &sdata->key_list, list) { if (key->conf.link_id < 0 || !(del_links_mask & BIT(key->conf.link_id))) continue; /* shouldn't happen for per-link keys */ WARN_ON(key->sta); ieee80211_key_disable_hw_accel(key); } list_for_each_entry(key, &sdata->key_list, list) { if (key->conf.link_id < 0 || !(add_links_mask & BIT(key->conf.link_id))) continue; /* shouldn't happen for per-link keys */ WARN_ON(key->sta); ret = ieee80211_key_enable_hw_accel(key); if (ret) return ret; } return 0; }
1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1065 1063 1064 1063 1065 418 417 418 418 85 85 85 85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * This file implements the various access functions for the * PROC file system. This is very similar to the IPv4 version, * except it reports the sockets in the INET6 address family. * * Authors: David S. Miller (davem@caip.rutgers.edu) * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> */ #include <linux/socket.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stddef.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/tcp.h> #include <net/udp.h> #include <net/transp_v6.h> #include <net/ipv6.h> #define MAX4(a, b, c, d) \ MAX_T(u32, MAX_T(u32, a, b), MAX_T(u32, c, d)) #define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ IPSTATS_MIB_MAX, ICMP_MIB_MAX) static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(net, &udpv6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); seq_printf(seq, "FRAG6: inuse %u memory %lu\n", atomic_read(&net->ipv6.fqdir->rhashtable.nelems), frag_mem_limit(net->ipv6.fqdir)); return 0; } static const struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS), SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS), SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES), SNMP_MIB_ITEM("Ip6InAddrErrors", IPSTATS_MIB_INADDRERRORS), SNMP_MIB_ITEM("Ip6InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("Ip6InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), SNMP_MIB_ITEM("Ip6ReasmReqds", IPSTATS_MIB_REASMREQDS), SNMP_MIB_ITEM("Ip6ReasmOKs", IPSTATS_MIB_REASMOKS), SNMP_MIB_ITEM("Ip6ReasmFails", IPSTATS_MIB_REASMFAILS), SNMP_MIB_ITEM("Ip6FragOKs", IPSTATS_MIB_FRAGOKS), SNMP_MIB_ITEM("Ip6FragFails", IPSTATS_MIB_FRAGFAILS), SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES), SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS), SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS), SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS), SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS), SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */ SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS), SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS), SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS), }; static const struct snmp_mib snmp6_icmp6_list[] = { /* icmpv6 mib according to RFC 2466 */ SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS), /* ICMP6_MIB_RATELIMITHOST needs to be last, see snmp6_dev_seq_show(). */ SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST), }; static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS), }; static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) { char name[32]; int i; /* print by name -- deprecated items */ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { const char *p = NULL; int icmptype; #define CASE(TYP, STR) case TYP: p = STR; break; icmptype = i & 0xff; switch (icmptype) { /* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */ CASE(ICMPV6_DEST_UNREACH, "DestUnreachs") CASE(ICMPV6_PKT_TOOBIG, "PktTooBigs") CASE(ICMPV6_TIME_EXCEED, "TimeExcds") CASE(ICMPV6_PARAMPROB, "ParmProblems") CASE(ICMPV6_ECHO_REQUEST, "Echos") CASE(ICMPV6_ECHO_REPLY, "EchoReplies") CASE(ICMPV6_MGM_QUERY, "GroupMembQueries") CASE(ICMPV6_MGM_REPORT, "GroupMembResponses") CASE(ICMPV6_MGM_REDUCTION, "GroupMembReductions") CASE(ICMPV6_MLD2_REPORT, "MLDv2Reports") CASE(NDISC_ROUTER_ADVERTISEMENT, "RouterAdvertisements") CASE(NDISC_ROUTER_SOLICITATION, "RouterSolicits") CASE(NDISC_NEIGHBOUR_ADVERTISEMENT, "NeighborAdvertisements") CASE(NDISC_NEIGHBOUR_SOLICITATION, "NeighborSolicits") CASE(NDISC_REDIRECT, "Redirects") } #undef CASE if (!p) /* don't print un-named types here */ continue; snprintf(name, sizeof(name), "Icmp6%s%s", i & 0x100 ? "Out" : "In", p); seq_printf(seq, "%-32s\t%lu\n", name, atomic_long_read(smib + i)); } /* print by number (nonzero only) - ICMPMsgStat format */ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { unsigned long val; val = atomic_long_read(smib + i); if (!val) continue; snprintf(name, sizeof(name), "Icmp6%sType%u", i & 0x100 ? "Out" : "In", i & 0xff); seq_printf(seq, "%-32s\t%lu\n", name, val); } } /* can be called either with percpu mib (pcpumib != NULL), * or shared one (smib != NULL) */ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist, int cnt) { unsigned long buff[SNMP_MIB_MAX]; int i; if (pcpumib) { memset(buff, 0, sizeof(unsigned long) * cnt); snmp_get_cpu_field_batch_cnt(buff, itemlist, cnt, pcpumib); for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, buff[i]); } else { for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, int cnt, size_t syncpoff) { u64 buff64[SNMP_MIB_MAX]; int i; memset(buff64, 0, sizeof(u64) * cnt); snmp_get_cpu_field64_batch_cnt(buff64, itemlist, cnt, mib, syncpoff); for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; snmp6_seq_show_item64(seq, net->mib.ipv6_statistics, snmp6_ipstats_list, ARRAY_SIZE(snmp6_ipstats_list), offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list, ARRAY_SIZE(snmp6_icmp6_list)); snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list, ARRAY_SIZE(snmp6_udp6_list)); return 0; } static int snmp6_dev_seq_show(struct seq_file *seq, void *v) { struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); snmp6_seq_show_item64(seq, idev->stats.ipv6, snmp6_ipstats_list, ARRAY_SIZE(snmp6_ipstats_list), offsetof(struct ipstats_mib, syncp)); /* Per idev icmp stats do not have ICMP6_MIB_RATELIMITHOST */ snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list, ARRAY_SIZE(snmp6_icmp6_list) - 1); snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs); return 0; } int snmp6_register_dev(struct inet6_dev *idev) { struct proc_dir_entry *p; struct net *net; if (!idev || !idev->dev) return -EINVAL; net = dev_net(idev->dev); if (!net->mib.proc_net_devsnmp6) return -ENOENT; p = proc_create_single_data(idev->dev->name, 0444, net->mib.proc_net_devsnmp6, snmp6_dev_seq_show, idev); if (!p) return -ENOMEM; idev->stats.proc_dir_entry = p; return 0; } int snmp6_unregister_dev(struct inet6_dev *idev) { struct net *net = dev_net(idev->dev); if (!net->mib.proc_net_devsnmp6) return -ENOENT; if (!idev->stats.proc_dir_entry) return -EINVAL; proc_remove(idev->stats.proc_dir_entry); idev->stats.proc_dir_entry = NULL; return 0; } static int __net_init ipv6_proc_init_net(struct net *net) { if (!proc_create_net_single("sockstat6", 0444, net->proc_net, sockstat6_seq_show, NULL)) return -ENOMEM; if (!proc_create_net_single("snmp6", 0444, net->proc_net, snmp6_seq_show, NULL)) goto proc_snmp6_fail; net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); if (!net->mib.proc_net_devsnmp6) goto proc_dev_snmp6_fail; return 0; proc_dev_snmp6_fail: remove_proc_entry("snmp6", net->proc_net); proc_snmp6_fail: remove_proc_entry("sockstat6", net->proc_net); return -ENOMEM; } static void __net_exit ipv6_proc_exit_net(struct net *net) { remove_proc_entry("sockstat6", net->proc_net); remove_proc_entry("dev_snmp6", net->proc_net); remove_proc_entry("snmp6", net->proc_net); } static struct pernet_operations ipv6_proc_ops = { .init = ipv6_proc_init_net, .exit = ipv6_proc_exit_net, }; int __init ipv6_misc_proc_init(void) { return register_pernet_subsys(&ipv6_proc_ops); } void ipv6_misc_proc_exit(void) { unregister_pernet_subsys(&ipv6_proc_ops); }
19 19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 /* SPDX-License-Identifier: GPL-2.0-only */ /* * 9P Client Definitions * * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> */ #ifndef NET_9P_CLIENT_H #define NET_9P_CLIENT_H #include <linux/utsname.h> #include <linux/idr.h> #include <linux/tracepoint-defs.h> /* Number of requests per row */ #define P9_ROW_MAXTAG 255 /* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ + * room for write (16 extra) or read (11 extra) operands. */ #define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ) /** enum p9_proto_versions - 9P protocol versions * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u * @p9_proto_2000u: 9P2000.u extension * @p9_proto_2000L: 9P2000.L extension */ enum p9_proto_versions { p9_proto_legacy, p9_proto_2000u, p9_proto_2000L, }; /** * enum p9_trans_status - different states of underlying transports * @Connected: transport is connected and healthy * @Disconnected: transport has been disconnected * @Hung: transport is connected by wedged * * This enumeration details the various states a transport * instatiation can be in. */ enum p9_trans_status { Connected, BeginDisconnect, Disconnected, Hung, }; /** * enum p9_req_status_t - status of a request * @REQ_STATUS_ALLOC: request has been allocated but not sent * @REQ_STATUS_UNSENT: request waiting to be sent * @REQ_STATUS_SENT: request sent to server * @REQ_STATUS_RCVD: response received from server * @REQ_STATUS_FLSHD: request has been flushed * @REQ_STATUS_ERROR: request encountered an error on the client side */ enum p9_req_status_t { REQ_STATUS_ALLOC, REQ_STATUS_UNSENT, REQ_STATUS_SENT, REQ_STATUS_RCVD, REQ_STATUS_FLSHD, REQ_STATUS_ERROR, }; /** * struct p9_req_t - request slots * @status: status of this request slot * @t_err: transport error * @wq: wait_queue for the client to block on for this request * @tc: the request fcall structure * @rc: the response fcall structure * @req_list: link for higher level objects to chain requests */ struct p9_req_t { int status; int t_err; refcount_t refcount; wait_queue_head_t wq; struct p9_fcall tc; struct p9_fcall rc; struct list_head req_list; }; /** * struct p9_client - per client instance state * @lock: protect @fids and @reqs * @msize: maximum data size negotiated by protocol * @proto_version: 9P protocol version to use * @trans_mod: module API instantiated with this client * @status: connection state * @trans: tranport instance state and API * @fids: All active FID handles * @reqs: All active requests. * @name: node name used as client id * * The client structure is used to keep track of various per-client * state that has been instantiated. */ struct p9_client { spinlock_t lock; unsigned int msize; unsigned char proto_version; struct p9_trans_module *trans_mod; enum p9_trans_status status; void *trans; struct kmem_cache *fcall_cache; union { struct { int rfd; int wfd; } fd; struct { u16 port; bool privport; } tcp; } trans_opts; struct idr fids; struct idr reqs; char name[__NEW_UTS_LEN + 1]; }; /** * struct p9_fd_opts - holds client options during parsing * @msize: maximum data size negotiated by protocol * @prot-Oversion: 9P protocol version to use * @trans_mod: module API instantiated with this client * * These parsed options get transferred into client in * apply_client_options() */ struct p9_client_opts { unsigned int msize; unsigned char proto_version; struct p9_trans_module *trans_mod; }; /** * struct p9_fd_opts - per-transport options for fd transport * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) * @privport: port is privileged */ struct p9_fd_opts { int rfd; int wfd; u16 port; bool privport; }; /** * struct p9_rdma_opts - Collection of mount options for rdma transport * @port: port of connection * @privport: Whether a privileged port may be used * @sq_depth: The requested depth of the SQ. This really doesn't need * to be any deeper than the number of threads used in the client * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth * @timeout: Time to wait in msecs for CM events */ struct p9_rdma_opts { short port; bool privport; int sq_depth; int rq_depth; long timeout; }; /** * struct p9_session_opts - holds parsed options for v9fs_session_info * @flags: session options of type &p9_session_flags * @nodev: set to 1 to disable device mapping * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_bits * @cachetag: the tag of the cache associated with this session * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy * @dfltuid: default numeric userid to mount hierarchy as * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @session_lock_timeout: retry interval for blocking locks * * This strucure holds options which are parsed and will be transferred * to the v9fs_session_info structure when mounted, and therefore largely * duplicates struct v9fs_session_info. */ struct p9_session_opts { unsigned int flags; unsigned char nodev; unsigned short debug; unsigned int afid; unsigned int cache; #ifdef CONFIG_9P_FSCACHE char *cachetag; #endif char *uname; char *aname; kuid_t dfltuid; kgid_t dfltgid; kuid_t uid; long session_lock_timeout; }; /* Used by mount API to store parsed mount options */ struct v9fs_context { struct p9_client_opts client_opts; struct p9_fd_opts fd_opts; struct p9_rdma_opts rdma_opts; struct p9_session_opts session_opts; }; /** * struct p9_fid - file system entity handle * @clnt: back pointer to instantiating &p9_client * @fid: numeric identifier for this handle * @mode: current mode of this fid (enum?) * @qid: the &p9_qid server identifier this handle points to * @iounit: the server reported maximum transaction size for this file * @uid: the numeric uid of the local user who owns this handle * @rdir: readdir accounting structure (allocated on demand) * @dlist: per-dentry fid tracking * * TODO: This needs lots of explanation. */ enum fid_source { FID_FROM_OTHER, FID_FROM_INODE, FID_FROM_DENTRY, }; struct p9_fid { struct p9_client *clnt; u32 fid; refcount_t count; int mode; struct p9_qid qid; u32 iounit; kuid_t uid; void *rdir; struct hlist_node dlist; /* list of all fids attached to a dentry */ struct hlist_node ilist; }; /** * struct p9_dirent - directory entry structure * @qid: The p9 server qid for this dirent * @d_off: offset to the next dirent * @d_type: type of file * @d_name: file name */ struct p9_dirent { struct p9_qid qid; u64 d_off; unsigned char d_type; char d_name[256]; }; struct iov_iter; int p9_show_client_options(struct seq_file *m, struct p9_client *clnt); int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, const char *name); int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, struct p9_fid *newdirfid, const char *new_name); struct p9_client *p9_client_create(struct fs_context *fc); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, const char *uname, kuid_t n_uname, const char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, const unsigned char * const *wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, char *extension); int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname); int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname, kgid_t gid, struct p9_qid *qid); int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode, kgid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); int p9_client_remove(struct p9_fid *fid); int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags); int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err); int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err); int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err); struct netfs_io_subrequest; void p9_client_write_subreq(struct netfs_io_subrequest *subreq); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); int p9dirent_read(struct p9_client *clnt, char *buf, int len, struct p9_dirent *dirent); struct p9_wstat *p9_client_stat(struct p9_fid *fid); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst); int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, dev_t rdev, kgid_t gid, struct p9_qid *qid); int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *qid); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); void p9_fcall_fini(struct p9_fcall *fc); struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); static inline void p9_req_get(struct p9_req_t *r) { refcount_inc(&r->refcount); } static inline int p9_req_try_get(struct p9_req_t *r) { return refcount_inc_not_zero(&r->refcount); } int p9_req_put(struct p9_client *c, struct p9_req_t *r); /* We cannot have the real tracepoints in header files, * use a wrapper function */ DECLARE_TRACEPOINT(9p_fid_ref); void do_trace_9p_fid_get(struct p9_fid *fid); void do_trace_9p_fid_put(struct p9_fid *fid); /* fid reference counting helpers: * - fids used for any length of time should always be referenced through * p9_fid_get(), and released with p9_fid_put() * - v9fs_fid_lookup() or similar will automatically call get for you * and also require a put * - the *_fid_add() helpers will stash the fid in the inode, * at which point it is the responsibility of evict_inode() * to call the put * - the last put will automatically send a clunk to the server */ static inline struct p9_fid *p9_fid_get(struct p9_fid *fid) { if (tracepoint_enabled(9p_fid_ref)) do_trace_9p_fid_get(fid); refcount_inc(&fid->count); return fid; } static inline int p9_fid_put(struct p9_fid *fid) { if (!fid || IS_ERR(fid)) return 0; if (tracepoint_enabled(9p_fid_ref)) do_trace_9p_fid_put(fid); if (!refcount_dec_and_test(&fid->count)) return 0; return p9_client_clunk(fid); } void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, int rewind); int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st); void p9stat_free(struct p9_wstat *stbuf); int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, const char *attr_name, u64 *attr_size); int p9_client_xattrcreate(struct p9_fid *fid, const char *name, u64 attr_size, int flags); int p9_client_readlink(struct p9_fid *fid, char **target); int p9_client_init(void); void p9_client_exit(void); #endif /* NET_9P_CLIENT_H */
195 196 197 196 196 197 38 38 194 197 13 13 13 13 13 13 13 13 13 13 13 8 8 8 8 8 8 8 8 29 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 // SPDX-License-Identifier: GPL-2.0 /* * Block stat tracking code * * Copyright (C) 2016 Jens Axboe */ #include <linux/kernel.h> #include <linux/rculist.h> #include "blk-stat.h" #include "blk-mq.h" #include "blk.h" struct blk_queue_stats { struct list_head callbacks; spinlock_t lock; int accounting; }; void blk_rq_stat_init(struct blk_rq_stat *stat) { stat->min = -1ULL; stat->max = stat->nr_samples = stat->mean = 0; stat->batch = 0; } /* src is a per-cpu stat, mean isn't initialized */ void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { if (dst->nr_samples + src->nr_samples <= dst->nr_samples) return; dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); dst->mean = div_u64(src->batch + dst->mean * dst->nr_samples, dst->nr_samples + src->nr_samples); dst->nr_samples += src->nr_samples; } void blk_rq_stat_add(struct blk_rq_stat *stat, u64 value) { stat->min = min(stat->min, value); stat->max = max(stat->max, value); stat->batch += value; stat->nr_samples++; } void blk_stat_add(struct request *rq, u64 now) { struct request_queue *q = rq->q; struct blk_stat_callback *cb; struct blk_rq_stat *stat; int bucket, cpu; u64 value; value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0; rcu_read_lock(); cpu = get_cpu(); list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { if (!blk_stat_is_active(cb)) continue; bucket = cb->bucket_fn(rq); if (bucket < 0) continue; stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket]; blk_rq_stat_add(stat, value); } put_cpu(); rcu_read_unlock(); } static void blk_stat_timer_fn(struct timer_list *t) { struct blk_stat_callback *cb = timer_container_of(cb, t, timer); unsigned int bucket; int cpu; for (bucket = 0; bucket < cb->buckets; bucket++) blk_rq_stat_init(&cb->stat[bucket]); for_each_online_cpu(cpu) { struct blk_rq_stat *cpu_stat; cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu); for (bucket = 0; bucket < cb->buckets; bucket++) { blk_rq_stat_sum(&cb->stat[bucket], &cpu_stat[bucket]); blk_rq_stat_init(&cpu_stat[bucket]); } } cb->timer_fn(cb); } struct blk_stat_callback * blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *), int (*bucket_fn)(const struct request *), unsigned int buckets, void *data) { struct blk_stat_callback *cb; cb = kmalloc_obj(*cb); if (!cb) return NULL; cb->stat = kmalloc_objs(struct blk_rq_stat, buckets); if (!cb->stat) { kfree(cb); return NULL; } cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat), __alignof__(struct blk_rq_stat)); if (!cb->cpu_stat) { kfree(cb->stat); kfree(cb); return NULL; } cb->timer_fn = timer_fn; cb->bucket_fn = bucket_fn; cb->data = data; cb->buckets = buckets; timer_setup(&cb->timer, blk_stat_timer_fn, 0); return cb; } void blk_stat_add_callback(struct request_queue *q, struct blk_stat_callback *cb) { unsigned int bucket; unsigned long flags; int cpu; for_each_possible_cpu(cpu) { struct blk_rq_stat *cpu_stat; cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu); for (bucket = 0; bucket < cb->buckets; bucket++) blk_rq_stat_init(&cpu_stat[bucket]); } spin_lock_irqsave(&q->stats->lock, flags); list_add_tail_rcu(&cb->list, &q->stats->callbacks); blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); } void blk_stat_remove_callback(struct request_queue *q, struct blk_stat_callback *cb) { unsigned long flags; spin_lock_irqsave(&q->stats->lock, flags); list_del_rcu(&cb->list); if (list_empty(&q->stats->callbacks) && !q->stats->accounting) blk_queue_flag_clear(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); timer_delete_sync(&cb->timer); } static void blk_stat_free_callback_rcu(struct rcu_head *head) { struct blk_stat_callback *cb; cb = container_of(head, struct blk_stat_callback, rcu); free_percpu(cb->cpu_stat); kfree(cb->stat); kfree(cb); } void blk_stat_free_callback(struct blk_stat_callback *cb) { if (cb) call_rcu(&cb->rcu, blk_stat_free_callback_rcu); } void blk_stat_disable_accounting(struct request_queue *q) { unsigned long flags; spin_lock_irqsave(&q->stats->lock, flags); if (!--q->stats->accounting && list_empty(&q->stats->callbacks)) blk_queue_flag_clear(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); } EXPORT_SYMBOL_GPL(blk_stat_disable_accounting); void blk_stat_enable_accounting(struct request_queue *q) { unsigned long flags; spin_lock_irqsave(&q->stats->lock, flags); if (!q->stats->accounting++ && list_empty(&q->stats->callbacks)) blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); } EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); struct blk_queue_stats *blk_alloc_queue_stats(void) { struct blk_queue_stats *stats; stats = kmalloc_obj(*stats); if (!stats) return NULL; INIT_LIST_HEAD(&stats->callbacks); spin_lock_init(&stats->lock); stats->accounting = 0; return stats; } void blk_free_queue_stats(struct blk_queue_stats *stats) { if (!stats) return; WARN_ON(!list_empty(&stats->callbacks)); kfree(stats); }
11 11 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 // SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which emulate a local clock-event * device via a broadcast event source. * * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/module.h> #include "tick-internal.h" /* * Broadcast support for broken x86 hardware, where the local apic * timer stops in C3 state. */ static struct tick_device tick_broadcast_device; static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; static cpumask_var_t tmpmask __cpumask_var_read_mostly; static int tick_broadcast_forced; static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); #ifdef CONFIG_TICK_ONESHOT static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); # ifdef CONFIG_HOTPLUG_CPU static void tick_broadcast_oneshot_offline(unsigned int cpu); # endif #else static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } static inline void tick_broadcast_clear_oneshot(int cpu) { } static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } # ifdef CONFIG_HOTPLUG_CPU static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } # endif #endif /* * Debugging: see timer_list.c */ struct tick_device *tick_get_broadcast_device(void) { return &tick_broadcast_device; } struct cpumask *tick_get_broadcast_mask(void) { return tick_broadcast_mask; } static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); const struct clock_event_device *tick_get_wakeup_device(int cpu) { return tick_get_oneshot_wakeup_device(cpu); } /* * Start the device in periodic mode */ static void tick_broadcast_start_periodic(struct clock_event_device *bc) { if (bc) { bc->next_event_forced = 0; tick_setup_periodic(bc, 1); } } /* * Check, if the device can be utilized as broadcast device: */ static bool tick_check_broadcast_device(struct clock_event_device *curdev, struct clock_event_device *newdev) { if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || (newdev->features & CLOCK_EVT_FEAT_PERCPU) || (newdev->features & CLOCK_EVT_FEAT_C3STOP)) return false; if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) return false; return !curdev || newdev->rating > curdev->rating; } #ifdef CONFIG_TICK_ONESHOT static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) { return per_cpu(tick_oneshot_wakeup_device, cpu); } static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) { /* * If we woke up early and the tick was reprogrammed in the * meantime then this may be spurious but harmless. */ tick_receive_broadcast(); } static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, int cpu) { struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); if (!newdev) goto set_device; if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || (newdev->features & CLOCK_EVT_FEAT_C3STOP)) return false; if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) return false; if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) return false; if (curdev && newdev->rating <= curdev->rating) return false; if (!try_module_get(newdev->owner)) return false; newdev->event_handler = tick_oneshot_wakeup_handler; set_device: clockevents_exchange_device(curdev, newdev); per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; return true; } #else static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) { return NULL; } static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, int cpu) { return false; } #endif /* * Conditionally install/replace broadcast device */ void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) { struct clock_event_device *cur = tick_broadcast_device.evtdev; if (tick_set_oneshot_wakeup_device(dev, cpu)) return; if (!tick_check_broadcast_device(cur, dev)) return; if (!try_module_get(dev->owner)) return; clockevents_exchange_device(cur, dev); if (cur) cur->event_handler = clockevents_handle_noop; tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(dev); if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return; /* * If the system already runs in oneshot mode, switch the newly * registered broadcast device to oneshot mode explicitly. */ if (tick_broadcast_oneshot_active()) { tick_broadcast_switch_to_oneshot(); return; } /* * Inform all cpus about this. We might be in a situation * where we did not switch to oneshot mode because the per cpu * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack * of a oneshot capable broadcast device. Without that * notification the systems stays stuck in periodic mode * forever. */ tick_clock_notify(); } /* * Check, if the device is the broadcast device */ int tick_is_broadcast_device(struct clock_event_device *dev) { return (dev && tick_broadcast_device.evtdev == dev); } int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { int ret = -ENODEV; if (tick_is_broadcast_device(dev)) { raw_spin_lock(&tick_broadcast_lock); ret = __clockevents_update_freq(dev, freq); raw_spin_unlock(&tick_broadcast_lock); } return ret; } static void err_broadcast(const struct cpumask *mask) { pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); } static void tick_device_setup_broadcast_func(struct clock_event_device *dev) { if (!dev->broadcast) dev->broadcast = tick_broadcast; if (!dev->broadcast) { pr_warn_once("%s depends on broadcast, but no broadcast function available\n", dev->name); dev->broadcast = err_broadcast; } } /* * Check, if the device is dysfunctional and a placeholder, which * needs to be handled by the broadcast device. */ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Devices might be registered with both periodic and oneshot * mode disabled. This signals, that the device needs to be * operated from the broadcast device and is a placeholder for * the cpu local device. */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc, false); ret = 1; } else { /* * Clear the broadcast bit for this cpu if the * device is not power state affected. */ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) cpumask_clear_cpu(cpu, tick_broadcast_mask); else tick_device_setup_broadcast_func(dev); /* * Clear the broadcast bit if the CPU is not in * periodic broadcast on state. */ if (!cpumask_test_cpu(cpu, tick_broadcast_on)) cpumask_clear_cpu(cpu, tick_broadcast_mask); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_ONESHOT: /* * If the system is in oneshot mode we can * unconditionally clear the oneshot mask bit, * because the CPU is running and therefore * not in an idle state which causes the power * state affected device to stop. Let the * caller initialize the device. */ tick_broadcast_clear_oneshot(cpu); ret = 0; break; case TICKDEV_MODE_PERIODIC: /* * If the system is in periodic mode, check * whether the broadcast device can be * switched off now. */ if (cpumask_empty(tick_broadcast_mask) && bc) clockevents_shutdown(bc); /* * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt * is delivered by the broadcast device, if * the broadcast device exists and is not * hrtimer based. */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; } int tick_receive_broadcast(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *evt = td->evtdev; if (!evt) return -ENODEV; if (!evt->event_handler) return -EINVAL; evt->event_handler(evt); return 0; } /* * Broadcast the event to the cpus, which are set in the mask (mangled). */ static bool tick_do_broadcast(struct cpumask *mask) { int cpu = smp_processor_id(); struct tick_device *td; bool local = false; /* * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { struct clock_event_device *bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, mask); /* * We only run the local handler, if the broadcast * device is not hrtimer based. Otherwise we run into * a hrtimer recursion. * * local timer_interrupt() * local_handler() * expire_hrtimers() * bc_handler() * local_handler() * expire_hrtimers() */ local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { /* * It might be necessary to actually check whether the devices * have different broadcast functions. For now, just use the * one of the first device. This works as long as we have this * misfeature only on x86 (lapic) */ td = &per_cpu(tick_cpu_device, cpumask_first(mask)); td->evtdev->broadcast(mask); } return local; } /* * Periodic broadcast: * - invoke the broadcast handlers */ static bool tick_do_periodic_broadcast(void) { cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); return tick_do_broadcast(tmpmask); } /* * Event handler for periodic broadcast ticks */ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); bool bc_local; raw_spin_lock(&tick_broadcast_lock); tick_broadcast_device.evtdev->next_event_forced = 0; /* Handle spurious interrupts gracefully */ if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { raw_spin_unlock(&tick_broadcast_lock); return; } bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); clockevents_program_event(dev, next, true); } raw_spin_unlock(&tick_broadcast_lock); /* * We run the handler of the local cpu after dropping * tick_broadcast_lock because the handler might deadlock when * trying to switch to oneshot mode. */ if (bc_local) td->evtdev->event_handler(td->evtdev); } /** * tick_broadcast_control - Enable/disable or force broadcast mode * @mode: The selected broadcast mode * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; unsigned long flags; /* Protects also the local clockevent device. */ raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) goto out; if (!tick_device_is_functional(dev)) goto out; cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); switch (mode) { case TICK_BROADCAST_FORCE: tick_broadcast_forced = 1; fallthrough; case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { /* * Only shutdown the cpu local device, if: * * - the broadcast device exists * - the broadcast device is not a hrtimer based one * - the broadcast device is in periodic mode to * avoid a hiccup during switch to oneshot mode */ if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; case TICK_BROADCAST_OFF: if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; } if (bc) { if (cpumask_empty(tick_broadcast_mask)) { if (!bc_stopped) clockevents_shutdown(bc); } else if (bc_stopped) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) tick_broadcast_start_periodic(bc); else tick_broadcast_setup_oneshot(bc, false); } } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } EXPORT_SYMBOL_GPL(tick_broadcast_control); /* * Set the periodic handler depending on broadcast on/off */ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) { if (!broadcast) dev->event_handler = tick_handle_periodic; else dev->event_handler = tick_handle_periodic_broadcast; } #ifdef CONFIG_HOTPLUG_CPU static void tick_shutdown_broadcast(void) { struct clock_event_device *bc = tick_broadcast_device.evtdev; if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { if (bc && cpumask_empty(tick_broadcast_mask)) clockevents_shutdown(bc); } } /* * Remove a CPU from broadcasting */ void tick_broadcast_offline(unsigned int cpu) { raw_spin_lock(&tick_broadcast_lock); cpumask_clear_cpu(cpu, tick_broadcast_mask); cpumask_clear_cpu(cpu, tick_broadcast_on); tick_broadcast_oneshot_offline(cpu); tick_shutdown_broadcast(); raw_spin_unlock(&tick_broadcast_lock); } #endif void tick_suspend_broadcast(void) { struct clock_event_device *bc; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) clockevents_shutdown(bc); raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* * This is called from tick_resume_local() on a resuming CPU. That's * called from the core resume function, tick_unfreeze() and the magic XEN * resume hackery. * * In none of these cases the broadcast device mode can change and the * bit of the resuming CPU in the broadcast mask is safe as well. */ bool tick_resume_check_broadcast(void) { if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) return false; else return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); } void tick_resume_broadcast(void) { struct clock_event_device *bc; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) { clockevents_tick_resume(bc); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_PERIODIC: if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(bc); break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) tick_resume_broadcast_oneshot(bc); break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } #ifdef CONFIG_TICK_ONESHOT static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; /* * Exposed for debugging: see timer_list.c */ struct cpumask *tick_get_broadcast_oneshot_mask(void) { return tick_broadcast_oneshot_mask; } /* * Called before going idle with interrupts disabled. Checks whether a * broadcast event from the other core is about to happen. We detected * that in tick_broadcast_oneshot_control(). The callsite can use this * to avoid a deep idle transition as we are about to get the * broadcast IPI right away. */ noinstr int tick_check_broadcast_expired(void) { #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); #else return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); #endif } /* * Set broadcast interrupt affinity */ static void tick_broadcast_set_affinity(struct clock_event_device *bc, const struct cpumask *cpumask) { if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) return; if (cpumask_equal(bc->cpumask, cpumask)) return; bc->cpumask = cpumask; irq_set_affinity(bc->irq, bc->cpumask); } static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, ktime_t expires) { if (!clockevent_state_oneshot(bc)) clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(bc, expires, 1); tick_broadcast_set_affinity(bc, cpumask_of(cpu)); } static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); } /* * Called from irq_enter() when idle was interrupted to reenable the * per cpu device. */ void tick_check_oneshot_broadcast_this_cpu(void) { if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); /* * We might be in the middle of switching over from * periodic to oneshot. If the CPU has not yet * switched over, leave the device alone. */ if (td->mode == TICKDEV_MODE_ONESHOT) { clockevents_switch_state(td->evtdev, CLOCK_EVT_STATE_ONESHOT); } } } /* * Handle oneshot mode broadcasting */ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) { struct tick_device *td; ktime_t now, next_event; int cpu, next_cpu = 0; bool bc_local; raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; tick_broadcast_device.evtdev->next_event_forced = 0; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { /* * Required for !SMP because for_each_cpu() reports * unconditionally CPU0 as set on UP kernels. */ if (!IS_ENABLED(CONFIG_SMP) && cpumask_empty(tick_broadcast_oneshot_mask)) break; td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event <= now) { cpumask_set_cpu(cpu, tmpmask); /* * Mark the remote cpu in the pending mask, so * it can avoid reprogramming the cpu local * timer in tick_broadcast_oneshot_control(). */ cpumask_set_cpu(cpu, tick_broadcast_pending_mask); } else if (td->evtdev->next_event < next_event) { next_event = td->evtdev->next_event; next_cpu = cpu; } } /* * Remove the current cpu from the pending mask. The event is * delivered immediately in tick_do_broadcast() ! */ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); /* Take care of enforced broadcast requests */ cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); cpumask_clear(tick_broadcast_force_mask); /* * Sanity check. Catch the case where we try to broadcast to * offline cpus. */ if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) cpumask_and(tmpmask, tmpmask, cpu_online_mask); /* * Wakeup the cpus which have an expired event. */ bc_local = tick_do_broadcast(tmpmask); /* * Two reasons for reprogram: * * - The global event did not expire any CPU local * events. This happens in dyntick mode, as the maximum PIT * delta is quite small. * * - There are pending events on sleeping CPUs which were not * in the event mask */ if (next_event != KTIME_MAX) tick_broadcast_set_event(dev, next_cpu, next_event); raw_spin_unlock(&tick_broadcast_lock); if (bc_local) { td = this_cpu_ptr(&tick_cpu_device); td->evtdev->event_handler(td->evtdev); } } static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) { if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) return 0; if (bc->next_event == KTIME_MAX) return 0; return bc->bound_on == cpu ? -EBUSY : 0; } static void broadcast_shutdown_local(struct clock_event_device *bc, struct clock_event_device *dev) { /* * For hrtimer based broadcasting we cannot shutdown the cpu * local device if our own event is the first one to expire or * if we own the broadcast timer. */ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { if (broadcast_needs_cpu(bc, smp_processor_id())) return; if (dev->next_event < bc->next_event) return; } clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, struct tick_device *td, int cpu) { struct clock_event_device *bc, *dev = td->evtdev; int ret = 0; ktime_t now; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; if (state == TICK_BROADCAST_ENTER) { /* * If the current CPU owns the hrtimer broadcast * mechanism, it cannot go deep idle and we do not add * the CPU to the broadcast mask. We don't have to go * through the EXIT path as the local timer is not * shutdown. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) goto out; /* * If the broadcast device is in periodic mode, we * return. */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { /* If it is a hrtimer based broadcast, return busy */ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) ret = -EBUSY; goto out; } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be * woken by the IPI right away; we return * busy, so the CPU does not try to go deep * idle. */ if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { ret = -EBUSY; } else if (dev->next_event < bc->next_event) { tick_broadcast_set_event(bc, cpu, dev->next_event); /* * In case of hrtimer broadcasts the * programming might have moved the * timer to this cpu. If yes, remove * us from the broadcast mask and * return busy. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) { cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } } } } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast * pending mask and fired the broadcast * IPI. So we are going to handle the expired * event anyway via the broadcast IPI * handler. No need to reprogram the timer * with an already expired event. */ if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_pending_mask)) goto out; /* * Bail out if there is no next event. */ if (dev->next_event == KTIME_MAX) goto out; /* * If the pending bit is not set, then we are * either the CPU handling the broadcast * interrupt or we got woken by something else. * * We are no longer in the broadcast mask, so * if the cpu local expiry time is already * reached, we would reprogram the cpu local * timer with an already expired event. * * This can lead to a ping-pong when we return * to idle and therefore rearm the broadcast * timer before the cpu local timer was able * to fire. This happens because the forced * reprogramming makes sure that the event * will happen in the future and depending on * the min_delta setting this might be far * enough out that the ping-pong starts. * * If the cpu local next_event has expired * then we know that the broadcast timer * next_event has expired as well and * broadcast is about to be handled. So we * avoid reprogramming and enforce that the * broadcast handler, which did not run yet, * will invoke the cpu local handler. * * We cannot call the handler directly from * here, because we might be in a NOHZ phase * and we did not go through the irq_enter() * nohz fixups. */ now = ktime_get(); if (dev->next_event <= now) { cpumask_set_cpu(cpu, tick_broadcast_force_mask); goto out; } /* * We got woken by something else. Reprogram * the cpu local timer device. */ tick_program_event(dev->next_event, 1); } } out: raw_spin_unlock(&tick_broadcast_lock); return ret; } static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, struct tick_device *td, int cpu) { struct clock_event_device *dev, *wd; dev = td->evtdev; if (td->mode != TICKDEV_MODE_ONESHOT) return -EINVAL; wd = tick_get_oneshot_wakeup_device(cpu); if (!wd) return -ENODEV; switch (state) { case TICK_BROADCAST_ENTER: clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(wd, dev->next_event, 1); break; case TICK_BROADCAST_EXIT: /* We may have transitioned to oneshot mode while idle */ if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) return -ENODEV; } return 0; } int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int cpu = smp_processor_id(); if (!tick_oneshot_wakeup_control(state, td, cpu)) return 0; if (tick_broadcast_device.evtdev) return ___tick_broadcast_oneshot_control(state, td, cpu); /* * If there is no broadcast or wakeup device, tell the caller not * to go into deep idle. */ return -EBUSY; } /* * Reset the one shot broadcast for a cpu * * Called with tick_broadcast_lock held */ static void tick_broadcast_clear_oneshot(int cpu) { cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); } static void tick_broadcast_init_next_event(struct cpumask *mask, ktime_t expires) { struct tick_device *td; int cpu; for_each_cpu(cpu, mask) { td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev) td->evtdev->next_event = expires; } } static inline ktime_t tick_get_next_period(void) { ktime_t next; /* * Protect against concurrent updates (store /load tearing on * 32bit). It does not matter if the time is already in the * past. The broadcast device which is about to be programmed will * fire in any case. */ raw_spin_lock(&jiffies_lock); next = tick_next_period; raw_spin_unlock(&jiffies_lock); return next; } /** * tick_broadcast_setup_oneshot - setup the broadcast device * @bc: the broadcast device * @from_periodic: true if called from periodic mode */ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { int cpu = smp_processor_id(); ktime_t nexttick = 0; if (!bc) return; /* * When the broadcast device was switched to oneshot by the first * CPU handling the NOHZ change, the other CPUs will reach this * code via hrtimer_run_queues() -> tick_check_oneshot_change() * too. Set up the broadcast device only once! */ if (bc->event_handler == tick_handle_oneshot_broadcast) { /* * The CPU which switched from periodic to oneshot mode * set the broadcast oneshot bit for all other CPUs which * are in the general (periodic) broadcast mask to ensure * that CPUs which wait for the periodic broadcast are * woken up. * * Clear the bit for the local CPU as the set bit would * prevent the first tick_broadcast_enter() after this CPU * switched to oneshot state to program the broadcast * device. * * This code can also be reached via tick_broadcast_control(), * but this cannot avoid the tick_broadcast_clear_oneshot() * as that would break the periodic to oneshot transition of * secondary CPUs. But that's harmless as the below only * clears already cleared bits. */ tick_broadcast_clear_oneshot(cpu); return; } bc->event_handler = tick_handle_oneshot_broadcast; bc->next_event_forced = 0; bc->next_event = KTIME_MAX; /* * When the tick mode is switched from periodic to oneshot it must * be ensured that CPUs which are waiting for periodic broadcast * get their wake-up at the next tick. This is achieved by ORing * tick_broadcast_mask into tick_broadcast_oneshot_mask. * * For other callers, e.g. broadcast device replacement, * tick_broadcast_oneshot_mask must not be touched as this would * set bits for CPUs which are already NOHZ, but not idle. Their * next tick_broadcast_enter() would observe the bit set and fail * to update the expiry time and the broadcast event device. */ if (from_periodic) { cpumask_copy(tmpmask, tick_broadcast_mask); /* Remove the local CPU as it is obviously not idle */ cpumask_clear_cpu(cpu, tmpmask); cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); /* * Ensure that the oneshot broadcast handler will wake the * CPUs which are still waiting for periodic broadcast. */ nexttick = tick_get_next_period(); tick_broadcast_init_next_event(tmpmask, nexttick); /* * If the underlying broadcast clock event device is * already in oneshot state, then there is nothing to do. * The device was already armed for the next tick * in tick_handle_broadcast_periodic() */ if (clockevent_state_oneshot(bc)) return; } /* * When switching from periodic to oneshot mode arm the broadcast * device for the next tick. * * If the broadcast device has been replaced in oneshot mode and * the oneshot broadcast mask is not empty, then arm it to expire * immediately in order to reevaluate the next expiring timer. * @nexttick is 0 and therefore in the past which will cause the * clockevent code to force an event. * * For both cases the programming can be avoided when the oneshot * broadcast mask is empty. * * tick_broadcast_set_event() implicitly switches the broadcast * device to oneshot state. */ if (!cpumask_empty(tick_broadcast_oneshot_mask)) tick_broadcast_set_event(bc, cpu, nexttick); } /* * Select oneshot operating mode for the broadcast device */ void tick_broadcast_switch_to_oneshot(void) { struct clock_event_device *bc; enum tick_device_mode oldmode; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); oldmode = tick_broadcast_device.mode; tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; bc = tick_broadcast_device.evtdev; if (bc) tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { struct clock_event_device *bc; unsigned long flags; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc && broadcast_needs_cpu(bc, deadcpu)) { /* * If the broadcast force bit of the current CPU is set, * then the current CPU has not yet reprogrammed the local * timer device to avoid a ping-pong race. See * ___tick_broadcast_oneshot_control(). * * If the broadcast device is hrtimer based then * programming the broadcast event below does not have any * effect because the local clockevent device is not * running and not programmed because the broadcast event * is not earlier than the pending event of the local clock * event device. As a consequence all CPUs waiting for a * broadcast event are stuck forever. * * Detect this condition and reprogram the cpu local timer * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } /* This moves the broadcast assignment to this CPU: */ bc->next_event_forced = 0; clockevents_program_event(bc, bc->next_event, 1); } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* * Remove a dying CPU from broadcasting */ static void tick_broadcast_oneshot_offline(unsigned int cpu) { if (tick_get_oneshot_wakeup_device(cpu)) tick_set_oneshot_wakeup_device(NULL, cpu); /* * Clear the broadcast masks for the dead cpu, but do not stop * the broadcast device! */ cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); } #endif /* * Check, whether the broadcast device is in one shot mode */ int tick_broadcast_oneshot_active(void) { return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; } /* * Check whether the broadcast device supports oneshot. */ bool tick_broadcast_oneshot_available(void) { struct clock_event_device *bc = tick_broadcast_device.evtdev; return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } #else int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc = tick_broadcast_device.evtdev; if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) return -EBUSY; return 0; } #endif void __init tick_broadcast_init(void) { zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); #ifdef CONFIG_TICK_ONESHOT zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); #endif }
17 17 17 17 17 17 17 17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/init.h> #include <linux/err.h> #include <linux/random.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/kref.h> #include <linux/xarray.h> #include <linux/workqueue.h> #include <uapi/linux/if_ether.h> #include <rdma/ib_pack.h> #include <rdma/ib_cache.h> #include <rdma/rdma_netlink.h> #include <net/netlink.h> #include <uapi/rdma/ib_user_sa.h> #include <rdma/ib_marshall.h> #include <rdma/ib_addr.h> #include <rdma/opa_addr.h> #include <rdma/rdma_cm.h> #include "sa.h" #include "core_priv.h" #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 #define IB_SA_CPI_MAX_RETRY_CNT 3 #define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */ static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; u16 pkey_index; u8 src_path_mask; }; enum rdma_class_port_info_type { RDMA_CLASS_PORT_INFO_IB, RDMA_CLASS_PORT_INFO_OPA }; struct rdma_class_port_info { enum rdma_class_port_info_type type; union { struct ib_class_port_info ib; struct opa_class_port_info opa; }; }; struct ib_sa_classport_cache { bool valid; int retry_cnt; struct rdma_class_port_info data; }; struct ib_sa_port { struct ib_mad_agent *agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; struct ib_sa_classport_cache classport_info; struct delayed_work ib_cpi_work; spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u32 port_num; }; struct ib_sa_device { int start_port, end_port; struct ib_event_handler event_handler; struct ib_sa_port port[]; }; struct ib_sa_query { void (*callback)(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad); void (*rmpp_callback)(struct ib_sa_query *sa_query, int status, struct ib_mad_recv_wc *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; int id; u32 flags; struct list_head list; /* Local svc request list */ u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 #define IB_SA_CANCEL 0x00000002 #define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_path_query { void (*callback)(int status, struct sa_path_rec *rec, unsigned int num_paths, void *context); void *context; struct ib_sa_query sa_query; struct sa_path_rec *conv_pr; }; struct ib_sa_guidinfo_query { void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_classport_info_query { void (*callback)(void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_service_query { void (*callback)(int status, struct sa_service_rec *rec, unsigned int num_services, void *context); void *context; struct ib_sa_query sa_query; }; static LIST_HEAD(ib_nl_request_list); static DEFINE_SPINLOCK(ib_nl_request_lock); static atomic_t ib_nl_sa_request_seq; static struct workqueue_struct *ib_nl_wq; static struct delayed_work ib_nl_timed_work; static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = { [LS_NLA_TYPE_PATH_RECORD] = {.type = NLA_BINARY, .len = sizeof(struct ib_path_rec_data)}, [LS_NLA_TYPE_TIMEOUT] = {.type = NLA_U32}, [LS_NLA_TYPE_SERVICE_ID] = {.type = NLA_U64}, [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid)}, [LS_NLA_TYPE_SGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid)}, [LS_NLA_TYPE_TCLASS] = {.type = NLA_U8}, [LS_NLA_TYPE_PKEY] = {.type = NLA_U16}, [LS_NLA_TYPE_QOS_CLASS] = {.type = NLA_U16}, }; static int ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { .name = "sa", .add = ib_sa_add_one, .remove = ib_sa_remove_one }; static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static DEFINE_SPINLOCK(tid_lock); static u32 tid; #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \ .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(sgid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(ib.dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { PATH_REC_FIELD(ib.slid), .offset_words = 10, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(ib.raw_traffic), .offset_words = 11, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 11, .offset_bits = 1, .size_bits = 3 }, { PATH_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { PATH_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { PATH_REC_FIELD(traffic_class), .offset_words = 12, .offset_bits = 0, .size_bits = 8 }, { PATH_REC_FIELD(reversible), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { PATH_REC_FIELD(numb_path), .offset_words = 12, .offset_bits = 9, .size_bits = 7 }, { PATH_REC_FIELD(pkey), .offset_words = 12, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(qos_class), .offset_words = 13, .offset_bits = 0, .size_bits = 12 }, { PATH_REC_FIELD(sl), .offset_words = 13, .offset_bits = 12, .size_bits = 4 }, { PATH_REC_FIELD(mtu_selector), .offset_words = 13, .offset_bits = 16, .size_bits = 2 }, { PATH_REC_FIELD(mtu), .offset_words = 13, .offset_bits = 18, .size_bits = 6 }, { PATH_REC_FIELD(rate_selector), .offset_words = 13, .offset_bits = 24, .size_bits = 2 }, { PATH_REC_FIELD(rate), .offset_words = 13, .offset_bits = 26, .size_bits = 6 }, { PATH_REC_FIELD(packet_life_time_selector), .offset_words = 14, .offset_bits = 0, .size_bits = 2 }, { PATH_REC_FIELD(packet_life_time), .offset_words = 14, .offset_bits = 2, .size_bits = 6 }, { PATH_REC_FIELD(preference), .offset_words = 14, .offset_bits = 8, .size_bits = 8 }, { RESERVED, .offset_words = 14, .offset_bits = 16, .size_bits = 48 }, }; #define OPA_PATH_REC_FIELD(field) \ .struct_offset_bytes = \ offsetof(struct sa_path_rec, field), \ .struct_size_bytes = \ sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field opa_path_rec_table[] = { { OPA_PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { OPA_PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { OPA_PATH_REC_FIELD(sgid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, { OPA_PATH_REC_FIELD(opa.dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 32 }, { OPA_PATH_REC_FIELD(opa.slid), .offset_words = 11, .offset_bits = 0, .size_bits = 32 }, { OPA_PATH_REC_FIELD(opa.raw_traffic), .offset_words = 12, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 12, .offset_bits = 1, .size_bits = 3 }, { OPA_PATH_REC_FIELD(flow_label), .offset_words = 12, .offset_bits = 4, .size_bits = 20 }, { OPA_PATH_REC_FIELD(hop_limit), .offset_words = 12, .offset_bits = 24, .size_bits = 8 }, { OPA_PATH_REC_FIELD(traffic_class), .offset_words = 13, .offset_bits = 0, .size_bits = 8 }, { OPA_PATH_REC_FIELD(reversible), .offset_words = 13, .offset_bits = 8, .size_bits = 1 }, { OPA_PATH_REC_FIELD(numb_path), .offset_words = 13, .offset_bits = 9, .size_bits = 7 }, { OPA_PATH_REC_FIELD(pkey), .offset_words = 13, .offset_bits = 16, .size_bits = 16 }, { OPA_PATH_REC_FIELD(opa.l2_8B), .offset_words = 14, .offset_bits = 0, .size_bits = 1 }, { OPA_PATH_REC_FIELD(opa.l2_10B), .offset_words = 14, .offset_bits = 1, .size_bits = 1 }, { OPA_PATH_REC_FIELD(opa.l2_9B), .offset_words = 14, .offset_bits = 2, .size_bits = 1 }, { OPA_PATH_REC_FIELD(opa.l2_16B), .offset_words = 14, .offset_bits = 3, .size_bits = 1 }, { RESERVED, .offset_words = 14, .offset_bits = 4, .size_bits = 2 }, { OPA_PATH_REC_FIELD(opa.qos_type), .offset_words = 14, .offset_bits = 6, .size_bits = 2 }, { OPA_PATH_REC_FIELD(opa.qos_priority), .offset_words = 14, .offset_bits = 8, .size_bits = 8 }, { RESERVED, .offset_words = 14, .offset_bits = 16, .size_bits = 3 }, { OPA_PATH_REC_FIELD(sl), .offset_words = 14, .offset_bits = 19, .size_bits = 5 }, { RESERVED, .offset_words = 14, .offset_bits = 24, .size_bits = 8 }, { OPA_PATH_REC_FIELD(mtu_selector), .offset_words = 15, .offset_bits = 0, .size_bits = 2 }, { OPA_PATH_REC_FIELD(mtu), .offset_words = 15, .offset_bits = 2, .size_bits = 6 }, { OPA_PATH_REC_FIELD(rate_selector), .offset_words = 15, .offset_bits = 8, .size_bits = 2 }, { OPA_PATH_REC_FIELD(rate), .offset_words = 15, .offset_bits = 10, .size_bits = 6 }, { OPA_PATH_REC_FIELD(packet_life_time_selector), .offset_words = 15, .offset_bits = 16, .size_bits = 2 }, { OPA_PATH_REC_FIELD(packet_life_time), .offset_words = 15, .offset_bits = 18, .size_bits = 6 }, { OPA_PATH_REC_FIELD(preference), .offset_words = 15, .offset_bits = 24, .size_bits = 8 }, }; #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \ .field_name = "sa_mcmember_rec:" #field static const struct ib_field mcmember_rec_table[] = { { MCMEMBER_REC_FIELD(mgid), .offset_words = 0, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(port_gid), .offset_words = 4, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(qkey), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { MCMEMBER_REC_FIELD(mlid), .offset_words = 9, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(mtu_selector), .offset_words = 9, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(mtu), .offset_words = 9, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(traffic_class), .offset_words = 9, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(pkey), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(rate_selector), .offset_words = 10, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(rate), .offset_words = 10, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(packet_life_time_selector), .offset_words = 10, .offset_bits = 24, .size_bits = 2 }, { MCMEMBER_REC_FIELD(packet_life_time), .offset_words = 10, .offset_bits = 26, .size_bits = 6 }, { MCMEMBER_REC_FIELD(sl), .offset_words = 11, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { MCMEMBER_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(scope), .offset_words = 12, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(join_state), .offset_words = 12, .offset_bits = 4, .size_bits = 4 }, { MCMEMBER_REC_FIELD(proxy_join), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { RESERVED, .offset_words = 12, .offset_bits = 9, .size_bits = 23 }, }; #define CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \ .field_name = "ib_class_port_info:" #field static const struct ib_field ib_classport_info_rec_table[] = { { CLASSPORTINFO_REC_FIELD(base_version), .offset_words = 0, .offset_bits = 0, .size_bits = 8 }, { CLASSPORTINFO_REC_FIELD(class_version), .offset_words = 0, .offset_bits = 8, .size_bits = 8 }, { CLASSPORTINFO_REC_FIELD(capability_mask), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), .offset_words = 6, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_lid), .offset_words = 7, .offset_bits = 0, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(redirect_pkey), .offset_words = 7, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(redirect_qp), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_qkey), .offset_words = 9, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_gid), .offset_words = 10, .offset_bits = 0, .size_bits = 128 }, { CLASSPORTINFO_REC_FIELD(trap_tcslfl), .offset_words = 14, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_lid), .offset_words = 15, .offset_bits = 0, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(trap_pkey), .offset_words = 15, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(trap_hlqp), .offset_words = 16, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_qkey), .offset_words = 17, .offset_bits = 0, .size_bits = 32 }, }; #define OPA_CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes =\ offsetof(struct opa_class_port_info, field), \ .struct_size_bytes = \ sizeof_field(struct opa_class_port_info, field), \ .field_name = "opa_class_port_info:" #field static const struct ib_field opa_classport_info_rec_table[] = { { OPA_CLASSPORTINFO_REC_FIELD(base_version), .offset_words = 0, .offset_bits = 0, .size_bits = 8 }, { OPA_CLASSPORTINFO_REC_FIELD(class_version), .offset_words = 0, .offset_bits = 8, .size_bits = 8 }, { OPA_CLASSPORTINFO_REC_FIELD(cap_mask), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl), .offset_words = 6, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid), .offset_words = 7, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey), .offset_words = 9, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_gid), .offset_words = 10, .offset_bits = 0, .size_bits = 128 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl), .offset_words = 14, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_lid), .offset_words = 15, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp), .offset_words = 16, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey), .offset_words = 17, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey), .offset_words = 18, .offset_bits = 0, .size_bits = 16 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey), .offset_words = 18, .offset_bits = 16, .size_bits = 16 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd), .offset_words = 19, .offset_bits = 0, .size_bits = 8 }, { RESERVED, .offset_words = 19, .offset_bits = 8, .size_bits = 24 }, }; #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \ .field_name = "sa_guidinfo_rec:" #field static const struct ib_field guidinfo_rec_table[] = { { GUIDINFO_REC_FIELD(lid), .offset_words = 0, .offset_bits = 0, .size_bits = 16 }, { GUIDINFO_REC_FIELD(block_num), .offset_words = 0, .offset_bits = 16, .size_bits = 8 }, { GUIDINFO_REC_FIELD(res1), .offset_words = 0, .offset_bits = 24, .size_bits = 8 }, { GUIDINFO_REC_FIELD(res2), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { GUIDINFO_REC_FIELD(guid_info_list), .offset_words = 2, .offset_bits = 0, .size_bits = 512 }, }; #define SERVICE_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct sa_service_rec, field), \ .struct_size_bytes = sizeof_field(struct sa_service_rec, field), \ .field_name = "sa_service_rec:" #field static const struct ib_field service_rec_table[] = { { SERVICE_REC_FIELD(id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { SERVICE_REC_FIELD(gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(pkey), .offset_words = 6, .offset_bits = 0, .size_bits = 16 }, { RESERVED, .offset_words = 6, .offset_bits = 16, .size_bits = 16 }, { SERVICE_REC_FIELD(lease), .offset_words = 7, .offset_bits = 0, .size_bits = 32 }, { SERVICE_REC_FIELD(key), .offset_words = 8, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(name), .offset_words = 12, .offset_bits = 0, .size_bits = 512 }, { SERVICE_REC_FIELD(data_8), .offset_words = 28, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(data_16), .offset_words = 32, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(data_32), .offset_words = 36, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(data_64), .offset_words = 40, .offset_bits = 0, .size_bits = 128 }, }; #define RDMA_PRIMARY_PATH_MAX_REC_NUM 3 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) { query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE; } static inline int ib_sa_query_cancelled(struct ib_sa_query *query) { return (query->flags & IB_SA_CANCEL); } static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, struct ib_sa_query *query) { struct sa_path_rec *sa_rec = query->mad_buf->context[1]; struct ib_sa_mad *mad = query->mad_buf->mad; ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask; u16 val16; u64 val64; struct rdma_ls_resolve_header *header; query->mad_buf->context[1] = NULL; /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); strscpy_pad(header->device_name, dev_name(&query->port->agent->device->dev), LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && sa_rec->reversible != 0) query->path_use = LS_RESOLVE_PATH_USE_ALL; else query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; header->path_use = query->path_use; /* Now build the attributes */ if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) { val64 = be64_to_cpu(sa_rec->service_id); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID, sizeof(val64), &val64); } if (comp_mask & IB_SA_PATH_REC_DGID) nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID, sizeof(sa_rec->dgid), &sa_rec->dgid); if (comp_mask & IB_SA_PATH_REC_SGID) nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID, sizeof(sa_rec->sgid), &sa_rec->sgid); if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS) nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS, sizeof(sa_rec->traffic_class), &sa_rec->traffic_class); if (comp_mask & IB_SA_PATH_REC_PKEY) { val16 = be16_to_cpu(sa_rec->pkey); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY, sizeof(val16), &val16); } if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) { val16 = be16_to_cpu(sa_rec->qos_class); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS, sizeof(val16), &val16); } } static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) { int len = 0; if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) len += nla_total_size(sizeof(u64)); if (comp_mask & IB_SA_PATH_REC_DGID) len += nla_total_size(sizeof(struct rdma_nla_ls_gid)); if (comp_mask & IB_SA_PATH_REC_SGID) len += nla_total_size(sizeof(struct rdma_nla_ls_gid)); if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS) len += nla_total_size(sizeof(u8)); if (comp_mask & IB_SA_PATH_REC_PKEY) len += nla_total_size(sizeof(u16)); if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) len += nla_total_size(sizeof(u16)); /* * Make sure that at least some of the required comp_mask bits are * set. */ if (WARN_ON(len == 0)) return len; /* Add the family header */ len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header)); return len; } static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; void *data; struct ib_sa_mad *mad; int len; unsigned long flags; unsigned long delay; gfp_t gfp_flag; int ret; INIT_LIST_HEAD(&query->list); query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); mad = query->mad_buf->mad; len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); if (len <= 0) return -EMSGSIZE; skb = nlmsg_new(len, gfp_mask); if (!skb) return -ENOMEM; /* Put nlmsg header only for now */ data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS, RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST); if (!data) { nlmsg_free(skb); return -EMSGSIZE; } /* Add attributes */ ib_nl_set_path_rec_attrs(skb, query); /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC : GFP_NOWAIT; spin_lock_irqsave(&ib_nl_request_lock, flags); ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag); if (ret) goto out; /* Put the request on the list.*/ delay = msecs_to_jiffies(sa_local_svc_timeout_ms); query->timeout = delay + jiffies; list_add_tail(&query->list, &ib_nl_request_list); /* Start the timeout if this is the only request */ if (ib_nl_request_list.next == &query->list) queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); out: spin_unlock_irqrestore(&ib_nl_request_lock, flags); return ret; } static int ib_nl_cancel_request(struct ib_sa_query *query) { unsigned long flags; struct ib_sa_query *wait_query; int found = 0; spin_lock_irqsave(&ib_nl_request_lock, flags); list_for_each_entry(wait_query, &ib_nl_request_list, list) { /* Let the timeout to take care of the callback */ if (query == wait_query) { query->flags |= IB_SA_CANCEL; query->timeout = jiffies; list_move(&query->list, &ib_nl_request_list); found = 1; mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1); break; } } spin_unlock_irqrestore(&ib_nl_request_lock, flags); return found; } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc); static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, const struct nlmsghdr *nlh) { struct sa_path_rec recs[RDMA_PRIMARY_PATH_MAX_REC_NUM]; struct ib_sa_path_query *path_query; struct ib_path_rec_data *rec_data; struct ib_mad_send_wc mad_send_wc; const struct nlattr *head, *curr; struct ib_sa_mad *mad = NULL; int len, rem, status = -EIO; unsigned int num_prs = 0; u32 mask = 0; if (!query->callback) goto out; path_query = container_of(query, struct ib_sa_path_query, sa_query); mad = query->mad_buf->mad; head = (const struct nlattr *) nlmsg_data(nlh); len = nlmsg_len(nlh); switch (query->path_use) { case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; break; case LS_RESOLVE_PATH_USE_ALL: mask = IB_PATH_PRIMARY; break; case LS_RESOLVE_PATH_USE_GMP: default: mask = IB_PATH_PRIMARY | IB_PATH_GMP | IB_PATH_BIDIRECTIONAL; break; } nla_for_each_attr(curr, head, len, rem) { if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD) continue; rec_data = nla_data(curr); if ((rec_data->flags & mask) != mask) continue; if ((query->flags & IB_SA_QUERY_OPA) || path_query->conv_pr) { mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; memcpy(mad->data, rec_data->path_rec, sizeof(rec_data->path_rec)); query->callback(query, 0, mad); goto out; } status = 0; ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), rec_data->path_rec, &recs[num_prs]); recs[num_prs].flags = rec_data->flags; recs[num_prs].rec_type = SA_PATH_REC_TYPE_IB; sa_path_set_dmac_zero(&recs[num_prs]); num_prs++; if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM) break; } if (!status) { mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; path_query->callback(status, recs, num_prs, path_query->context); } else query->callback(query, status, mad); out: mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_SUCCESS; send_handler(query->mad_buf->mad_agent, &mad_send_wc); } static void ib_nl_request_timeout(struct work_struct *work) { unsigned long flags; struct ib_sa_query *query; unsigned long delay; struct ib_mad_send_wc mad_send_wc; int ret; spin_lock_irqsave(&ib_nl_request_lock, flags); while (!list_empty(&ib_nl_request_list)) { query = list_entry(ib_nl_request_list.next, struct ib_sa_query, list); if (time_after(query->timeout, jiffies)) { delay = query->timeout - jiffies; if ((long)delay <= 0) delay = 1; queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); break; } list_del(&query->list); ib_sa_disable_local_svc(query); /* Hold the lock to protect against query cancellation */ if (ib_sa_query_cancelled(query)) ret = -1; else ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_WR_FLUSH_ERR; spin_unlock_irqrestore(&ib_nl_request_lock, flags); send_handler(query->port->agent, &mad_send_wc); spin_lock_irqsave(&ib_nl_request_lock, flags); } } spin_unlock_irqrestore(&ib_nl_request_lock, flags); } int ib_nl_handle_set_timeout(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { int timeout, delta, abs_delta; const struct nlattr *attr; unsigned long flags; struct ib_sa_query *query; long delay = 0; struct nlattr *tb[LS_NLA_TYPE_MAX]; int ret; if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk)) return -EPERM; ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_policy, NULL); attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT]; if (ret || !attr) goto settimeout_out; timeout = *(int *) nla_data(attr); if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN) timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN; if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX) timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX; spin_lock_irqsave(&ib_nl_request_lock, flags); delta = timeout - sa_local_svc_timeout_ms; if (delta < 0) abs_delta = -delta; else abs_delta = delta; if (delta != 0) { sa_local_svc_timeout_ms = timeout; list_for_each_entry(query, &ib_nl_request_list, list) { if (delta < 0 && abs_delta > query->timeout) query->timeout = 0; else query->timeout += delta; /* Get the new delay from the first entry */ if (!delay) { delay = query->timeout - jiffies; if (delay <= 0) delay = 1; } } if (delay) mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, (unsigned long)delay); } spin_unlock_irqrestore(&ib_nl_request_lock, flags); settimeout_out: return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) { struct nlattr *tb[LS_NLA_TYPE_MAX]; int ret; if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return 0; ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_policy, NULL); if (ret) return 0; return 1; } int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { unsigned long flags; struct ib_sa_query *query = NULL, *iter; struct ib_mad_send_buf *send_buf; struct ib_mad_send_wc mad_send_wc; int ret; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk)) return -EPERM; spin_lock_irqsave(&ib_nl_request_lock, flags); list_for_each_entry(iter, &ib_nl_request_list, list) { /* * If the query is cancelled, let the timeout routine * take care of it. */ if (nlh->nlmsg_seq == iter->seq) { if (!ib_sa_query_cancelled(iter)) { list_del(&iter->list); query = iter; } break; } } if (!query) { spin_unlock_irqrestore(&ib_nl_request_lock, flags); goto resp_out; } send_buf = query->mad_buf; if (!ib_nl_is_good_resolve_resp(nlh)) { /* if the result is a failure, send out the packet via IB */ ib_sa_disable_local_svc(query); ret = ib_post_send_mad(query->mad_buf, NULL); spin_unlock_irqrestore(&ib_nl_request_lock, flags); if (ret) { mad_send_wc.send_buf = send_buf; mad_send_wc.status = IB_WC_GENERAL_ERR; send_handler(query->port->agent, &mad_send_wc); } } else { spin_unlock_irqrestore(&ib_nl_request_lock, flags); ib_nl_process_good_resolve_rsp(query, nlh); } resp_out: return 0; } static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); rdma_destroy_ah(sm_ah->ah, 0); kfree(sm_ah); } void ib_sa_register_client(struct ib_sa_client *client) { atomic_set(&client->users, 1); init_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_register_client); void ib_sa_unregister_client(struct ib_sa_client *client) { ib_sa_client_put(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_unregister_client); /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel * @query:query pointer to cancel * * Try to cancel an SA query. If the id and query don't match up or * the query has already completed, nothing is done. Otherwise the * query is canceled and will complete with a status of -EINTR. */ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_send_buf *mad_buf; xa_lock_irqsave(&queries, flags); if (xa_load(&queries, id) != query) { xa_unlock_irqrestore(&queries, flags); return; } mad_buf = query->mad_buf; xa_unlock_irqrestore(&queries, flags); /* * If the query is still on the netlink request list, schedule * it to be cancelled by the timeout routine. Otherwise, it has been * sent to the MAD layer and has to be cancelled from there. */ if (!ib_nl_cancel_request(query)) ib_cancel_mad(mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); static u8 get_src_path_mask(struct ib_device *device, u32 port_num) { struct ib_sa_device *sa_dev; struct ib_sa_port *port; unsigned long flags; u8 src_path_mask; sa_dev = ib_get_client_data(device, &sa_client); if (!sa_dev) return 0x7f; port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->ah_lock, flags); src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f; spin_unlock_irqrestore(&port->ah_lock, flags); return src_path_mask; } static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr) { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); if (!gid_attr) { gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type, port_num, NULL); if (IS_ERR(gid_attr)) return PTR_ERR(gid_attr); } else rdma_hold_gid_attr(gid_attr); rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, be32_to_cpu(rec->flow_label), rec->hop_limit, rec->traffic_class, gid_attr); return 0; } /** * ib_init_ah_attr_from_path - Initialize address handle attributes based on * an SA path record. * @device: Device associated ah attributes initialization. * @port_num: Port on the specified device. * @rec: path record entry to use for ah attributes initialization. * @ah_attr: address handle attributes to initialization from path record. * @gid_attr: SGID attribute to consider during initialization. * * When ib_init_ah_attr_from_path() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute * when GRH is present for IB link layer. * (b) for RoCE link layer it contains a reference to SGID attribute. * User must invoke rdma_destroy_ah_attr() to release reference to SGID * attributes which are initialized using ib_init_ah_attr_from_path(). */ int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr) { int ret = 0; memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { ret = roce_resolve_route_from_path(rec, gid_attr); if (ret) return ret; memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN); } else { rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); if (sa_path_is_opa(rec) && rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) rdma_ah_set_make_grd(ah_attr, true); rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) & get_src_path_mask(device, port_num)); } if (rec->hop_limit > 0 || sa_path_is_roce(rec)) ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr, gid_attr); return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) { struct rdma_ah_attr ah_attr; unsigned long flags; spin_lock_irqsave(&query->port->ah_lock, flags); if (!query->port->sm_ah) { spin_unlock_irqrestore(&query->port->ah_lock, flags); return -EAGAIN; } kref_get(&query->port->sm_ah->ref); query->sm_ah = query->port->sm_ah; spin_unlock_irqrestore(&query->port->ah_lock, flags); /* * Always check if sm_ah has valid dlid assigned, * before querying for class port info */ if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) || !rdma_is_valid_unicast_lid(&ah_attr)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -EAGAIN; } query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, gfp_mask, ((query->flags & IB_SA_QUERY_OPA) ? OPA_MGMT_BASE_VERSION : IB_MGMT_BASE_VERSION)); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; } query->mad_buf->ah = query->sm_ah->ah; return 0; } static void free_mad(struct ib_sa_query *query) { ib_free_send_mad(query->mad_buf); kref_put(&query->sm_ah->ref, free_sm_ah); } static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent) { struct ib_sa_mad *mad = query->mad_buf->mad; unsigned long flags; memset(mad, 0, sizeof *mad); if (query->flags & IB_SA_QUERY_OPA) { mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION; mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION; } else { mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; } mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; spin_lock_irqsave(&tid_lock, flags); mad->mad_hdr.tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); spin_unlock_irqrestore(&tid_lock, flags); } static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, gfp_t gfp_mask) { unsigned long flags; int ret, id; const int nmbr_sa_query_retries = 10; xa_lock_irqsave(&queries, flags); ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask); xa_unlock_irqrestore(&queries, flags); if (ret < 0) return ret; query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries; query->mad_buf->retries = nmbr_sa_query_retries; if (!query->mad_buf->timeout_ms) { /* Special case, very small timeout_ms */ query->mad_buf->timeout_ms = 1; query->mad_buf->retries = timeout_ms; } query->mad_buf->context[0] = query; query->id = id; if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && (!(query->flags & IB_SA_QUERY_OPA))) { if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { if (!ib_nl_make_request(query, gfp_mask)) return id; } ib_sa_disable_local_svc(query); } ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { xa_lock_irqsave(&queries, flags); __xa_erase(&queries, id); xa_unlock_irqrestore(&queries, flags); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in * another context. */ return ret ? ret : id; } void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec) { ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); } EXPORT_SYMBOL(ib_sa_unpack_path); void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute) { ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); } EXPORT_SYMBOL(ib_sa_pack_path); void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute) { ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec, attribute); } EXPORT_SYMBOL(ib_sa_pack_service); void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec) { ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), attribute, rec); } EXPORT_SYMBOL(ib_sa_unpack_service); static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client, struct ib_sa_device *sa_dev, u32 port_num) { struct ib_sa_port *port; unsigned long flags; bool ret = false; port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->classport_lock, flags); if (!port->classport_info.valid) goto ret; if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA) ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) & OPA_CLASS_PORT_INFO_PR_SUPPORT; ret: spin_unlock_irqrestore(&port->classport_lock, flags); return ret; } enum opa_pr_supported { PR_NOT_SUPPORTED, PR_OPA_SUPPORTED, PR_IB_SUPPORTED }; /* * opa_pr_query_possible - Check if current PR query can be an OPA query. * * Returns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record * query is possible. */ static int opa_pr_query_possible(struct ib_sa_client *client, struct ib_sa_device *sa_dev, struct ib_device *device, u32 port_num) { struct ib_port_attr port_attr; if (ib_query_port(device, port_num, &port_attr)) return PR_NOT_SUPPORTED; if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num)) return PR_OPA_SUPPORTED; if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) return PR_NOT_SUPPORTED; else return PR_IB_SUPPORTED; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); struct sa_path_rec rec = {}; if (!mad) { query->callback(status, NULL, 0, query->context); return; } if (sa_query->flags & IB_SA_QUERY_OPA) { ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_OPA; query->callback(status, &rec, 1, query->context); return; } ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_IB; sa_path_set_dmac_zero(&rec); if (query->conv_pr) { struct sa_path_rec opa; memset(&opa, 0, sizeof(struct sa_path_rec)); sa_convert_path_ib_to_opa(&opa, &rec); query->callback(status, &opa, 1, query->context); } else { query->callback(status, &rec, 1, query->context); } } #define IB_SA_DATA_OFFS 56 #define IB_SERVICE_REC_SZ 176 static void ib_unpack_service_rmpp(struct sa_service_rec *rec, struct ib_mad_recv_wc *mad_wc, int num_services) { unsigned int cp_sz, data_i, data_size, rec_i = 0, buf_i = 0; struct ib_mad_recv_buf *mad_buf; u8 buf[IB_SERVICE_REC_SZ]; u8 *data; data_size = sizeof(((struct ib_sa_mad *) mad_buf->mad)->data); list_for_each_entry(mad_buf, &mad_wc->rmpp_list, list) { data = ((struct ib_sa_mad *) mad_buf->mad)->data; data_i = 0; while (data_i < data_size && rec_i < num_services) { cp_sz = min(IB_SERVICE_REC_SZ - buf_i, data_size - data_i); memcpy(buf + buf_i, data + data_i, cp_sz); data_i += cp_sz; buf_i += cp_sz; if (buf_i == IB_SERVICE_REC_SZ) { ib_sa_unpack_service(buf, rec + rec_i); buf_i = 0; rec_i++; } } } } static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_mad_recv_wc *mad_wc) { struct ib_sa_service_query *query = container_of(sa_query, struct ib_sa_service_query, sa_query); struct sa_service_rec *rec; int num_services; if (!mad_wc || !mad_wc->recv_buf.mad) { query->callback(status, NULL, 0, query->context); return; } num_services = (mad_wc->mad_len - IB_SA_DATA_OFFS) / IB_SERVICE_REC_SZ; if (!num_services) { query->callback(-ENODATA, NULL, 0, query->context); return; } rec = kmalloc_objs(*rec, num_services); if (!rec) { query->callback(-ENOMEM, NULL, 0, query->context); return; } ib_unpack_service_rmpp(rec, mad_wc, num_services); query->callback(status, rec, num_services, query->context); kfree(rec); } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); kfree(query->conv_pr); kfree(query); } static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { struct ib_sa_service_query *query = container_of(sa_query, struct ib_sa_service_query, sa_query); kfree(query); } /** * ib_sa_path_rec_get - Start a Path get query * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Path Record to send in query * @comp_mask:component mask to send in query * @timeout_ms:time to wait for response * @gfp_mask:GFP mask to use for internal allocations * @callback:function called when query completes, times out or is * canceled * @context:opaque user context passed to callback * @sa_query:query context, used to cancel query * * Send a Path Record Get query to the SA to look up a path. The * callback function will be called when the query completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_path_rec_get() is negative, it is an * error code. Otherwise it is a query ID that can be used to cancel * the query. */ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, unsigned int num_paths, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; enum opa_pr_supported status; int ret; if (!sa_dev) return -ENODEV; if ((rec->rec_type != SA_PATH_REC_TYPE_IB) && (rec->rec_type != SA_PATH_REC_TYPE_OPA)) return -EINVAL; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc_obj(*query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { status = opa_pr_query_possible(client, sa_dev, device, port_num); if (status == PR_NOT_SUPPORTED) { ret = -EINVAL; goto err1; } else if (status == PR_OPA_SUPPORTED) { query->sa_query.flags |= IB_SA_QUERY_OPA; } else { query->conv_pr = kmalloc_obj(*query->conv_pr, gfp_mask); if (!query->conv_pr) { ret = -ENOMEM; goto err1; } } } ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err2; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; if (query->sa_query.flags & IB_SA_QUERY_OPA) { ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), rec, mad->data); } else if (query->conv_pr) { sa_convert_path_opa_to_ib(query->conv_pr, rec); ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), query->conv_pr, mad->data); } else { ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); } *sa_query = &query->sa_query; query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; query->sa_query.mad_buf->context[1] = (query->conv_pr) ? query->conv_pr : rec; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err3; return ret; err3: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err2: kfree(query->conv_pr); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); /** * ib_sa_service_rec_get - Start a Service get query * @client: SA client * @device: device to send query on * @port_num: port number to send query on * @rec: Service Record to send in query * @comp_mask: component mask to send in query * @timeout_ms: time to wait for response * @gfp_mask: GFP mask to use for internal allocations * @callback: function called when query completes, times out or is * canceled * @context: opaque user context passed to callback * @sa_query: query context, used to cancel query * * Send a Service Record Get query to the SA to look up a path. The * callback function will be called when the query completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_service_rec_get() is negative, it is an * error code. Otherwise it is a query ID that can be used to cancel * the query. */ int ib_sa_service_rec_get(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct sa_service_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_service_rec *resp, unsigned int num_services, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_service_query *query; struct ib_mad_agent *agent; struct ib_sa_port *port; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc_obj(*query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.rmpp_callback = callback ? ib_sa_service_rec_callback : NULL; query->sa_query.release = ib_sa_service_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET_TABLE; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); mad->sa_hdr.comp_mask = comp_mask; ib_sa_pack_service(rec, mad->data); *sa_query = &query->sa_query; query->sa_query.mad_buf->context[1] = rec; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_get); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = container_of(sa_query, struct ib_sa_mcmember_query, sa_query); if (mad) { struct ib_sa_mcmember_rec rec; ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } int ib_sa_mcmember_rec_query(struct ib_sa_client *client, struct ib_device *device, u32 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc_obj(*query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); if (mad) { struct ib_sa_guidinfo_rec rec; ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); } int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_guidinfo_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) { return -EINVAL; } port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc_obj(*query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; query->sa_query.release = ib_sa_guidinfo_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); struct ib_classport_info_context { struct completion done; struct ib_sa_query *sa_query; }; static void ib_classportinfo_cb(void *context) { struct ib_classport_info_context *cb_ctx = context; complete(&cb_ctx->done); } static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); struct ib_sa_classport_cache *info = &sa_query->port->classport_info; if (mad) { if (sa_query->flags & IB_SA_QUERY_OPA) { struct opa_class_port_info rec; ib_unpack(opa_classport_info_rec_table, ARRAY_SIZE(opa_classport_info_rec_table), mad->data, &rec); spin_lock_irqsave(&sa_query->port->classport_lock, flags); if (!status && !info->valid) { memcpy(&info->data.opa, &rec, sizeof(info->data.opa)); info->valid = true; info->data.type = RDMA_CLASS_PORT_INFO_OPA; } spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); } else { struct ib_class_port_info rec; ib_unpack(ib_classport_info_rec_table, ARRAY_SIZE(ib_classport_info_rec_table), mad->data, &rec); spin_lock_irqsave(&sa_query->port->classport_lock, flags); if (!status && !info->valid) { memcpy(&info->data.ib, &rec, sizeof(info->data.ib)); info->valid = true; info->data.type = RDMA_CLASS_PORT_INFO_IB; } spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); } } query->callback(query->context); } static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_classport_info_query, sa_query)); } static int ib_sa_classport_info_rec_query(struct ib_sa_port *port, unsigned long timeout_ms, void (*callback)(void *context), void *context, struct ib_sa_query **sa_query) { struct ib_mad_agent *agent; struct ib_sa_classport_info_query *query; struct ib_sa_mad *mad; gfp_t gfp_mask = GFP_KERNEL; int ret; agent = port->agent; query = kzalloc_obj(*query, gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device, port->port_num) ? IB_SA_QUERY_OPA : 0; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err_free; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = ib_sa_classport_info_rec_callback; query->sa_query.release = ib_sa_classport_info_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); mad->sa_hdr.comp_mask = 0; *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err_free_mad; return ret; err_free_mad: *sa_query = NULL; free_mad(&query->sa_query); err_free: kfree(query); return ret; } static void update_ib_cpi(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, ib_cpi_work.work); struct ib_classport_info_context *cb_context; unsigned long flags; int ret; /* If the classport info is valid, nothing * to do here. */ spin_lock_irqsave(&port->classport_lock, flags); if (port->classport_info.valid) { spin_unlock_irqrestore(&port->classport_lock, flags); return; } spin_unlock_irqrestore(&port->classport_lock, flags); cb_context = kmalloc_obj(*cb_context); if (!cb_context) goto err_nomem; init_completion(&cb_context->done); ret = ib_sa_classport_info_rec_query(port, 3000, ib_classportinfo_cb, cb_context, &cb_context->sa_query); if (ret < 0) goto free_cb_err; wait_for_completion(&cb_context->done); free_cb_err: kfree(cb_context); spin_lock_irqsave(&port->classport_lock, flags); /* If the classport info is still not valid, the query should have * failed for some reason. Retry issuing the query */ if (!port->classport_info.valid) { port->classport_info.retry_cnt++; if (port->classport_info.retry_cnt <= IB_SA_CPI_MAX_RETRY_CNT) { unsigned long delay = msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT); queue_delayed_work(ib_wq, &port->ib_cpi_work, delay); } } spin_unlock_irqrestore(&port->classport_lock, flags); err_nomem: return; } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; int status = 0; if (query->callback || query->rmpp_callback) { switch (mad_send_wc->status) { case IB_WC_SUCCESS: /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: status = -ETIMEDOUT; break; case IB_WC_WR_FLUSH_ERR: status = -EINTR; break; default: status = -EIO; break; } if (status) query->callback ? query->callback(query, status, NULL) : query->rmpp_callback(query, status, NULL); } xa_lock_irqsave(&queries, flags); __xa_erase(&queries, query->id); xa_unlock_irqrestore(&queries, flags); free_mad(query); if (query->client) ib_sa_client_put(query->client); query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; struct ib_mad *mad; if (!send_buf) return; query = send_buf->context[0]; mad = mad_recv_wc->recv_buf.mad; if (query->rmpp_callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->rmpp_callback(query, mad->mad_hdr.status ? -EINVAL : 0, mad_recv_wc); else query->rmpp_callback(query, -EIO, NULL); } else if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad->mad_hdr.status ? -EINVAL : 0, (struct ib_sa_mad *)mad); else query->callback(query, -EIO, NULL); } ib_free_recv_mad(mad_recv_wc); } static void update_sm_ah(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, update_task); struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct rdma_ah_attr ah_attr; bool grh_required; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); return; } new_ah = kmalloc_obj(*new_ah); if (!new_ah) return; kref_init(&new_ah->ref); new_ah->src_path_mask = (1 << port_attr.lmc) - 1; new_ah->pkey_index = 0; if (ib_find_pkey(port->agent->device, port->port_num, IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) pr_err("Couldn't find index for default PKey\n"); memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.type = rdma_ah_find_type(port->agent->device, port->port_num); rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); grh_required = rdma_is_grh_required(port->agent->device, port->port_num); /* * The OPA sm_lid of 0xFFFF needs special handling so that it can be * differentiated from a permissive LID of 0xFFFF. We set the * grh_required flag here so the SA can program the DGID in the * address handle appropriately */ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && (grh_required || port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) rdma_ah_set_make_grd(&ah_attr, true); if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) { rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); rdma_ah_set_subnet_prefix(&ah_attr, cpu_to_be64(port_attr.subnet_prefix)); rdma_ah_set_interface_id(&ah_attr, cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(new_ah->ah)) { pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); return; } spin_lock_irq(&port->ah_lock); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); } static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) { if (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER) { unsigned long flags; struct ib_sa_device *sa_dev = container_of(handler, typeof(*sa_dev), event_handler); u32 port_num = event->element.port_num - sa_dev->start_port; struct ib_sa_port *port = &sa_dev->port[port_num]; if (!rdma_cap_ib_sa(handler->device, port->port_num)) return; spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); if (event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PORT_ACTIVE) { unsigned long delay = msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT); spin_lock_irqsave(&port->classport_lock, flags); port->classport_info.valid = false; port->classport_info.retry_cnt = 0; spin_unlock_irqrestore(&port->classport_lock, flags); queue_delayed_work(ib_wq, &port->ib_cpi_work, delay); } queue_work(ib_wq, &sa_dev->port[port_num].update_task); } } static int ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; int count = 0; int ret; s = rdma_start_port(device); e = rdma_end_port(device); sa_dev = kzalloc_flex(*sa_dev, port, size_add(size_sub(e, s), 1)); if (!sa_dev) return -ENOMEM; sa_dev->start_port = s; sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); if (!rdma_cap_ib_sa(device, i + 1)) continue; sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; spin_lock_init(&sa_dev->port[i].classport_lock); sa_dev->port[i].classport_info.valid = false; sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, IB_MGMT_RMPP_VERSION, send_handler, recv_handler, sa_dev, 0); if (IS_ERR(sa_dev->port[i].agent)) { ret = PTR_ERR(sa_dev->port[i].agent); goto err; } INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work, update_ib_cpi); count++; } if (!count) { ret = -EOPNOTSUPP; goto free; } ib_set_client_data(device, &sa_client, sa_dev); /* * We register our event handler after everything is set up, * and then update our cached info after the event handler is * registered to avoid any problems if a port changes state * during our initialization. */ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); ib_register_event_handler(&sa_dev->event_handler); for (i = 0; i <= e - s; ++i) { if (rdma_cap_ib_sa(device, i + 1)) update_sm_ah(&sa_dev->port[i].update_task); } return 0; err: while (--i >= 0) { if (rdma_cap_ib_sa(device, i + 1)) ib_unregister_mad_agent(sa_dev->port[i].agent); } free: kfree(sa_dev); return ret; } static void ib_sa_remove_one(struct ib_device *device, void *client_data) { struct ib_sa_device *sa_dev = client_data; int i; ib_unregister_event_handler(&sa_dev->event_handler); flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { if (rdma_cap_ib_sa(device, i + 1)) { cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work); ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } } kfree(sa_dev); } int ib_sa_init(void) { int ret; get_random_bytes(&tid, sizeof tid); atomic_set(&ib_nl_sa_request_seq, 0); ret = ib_register_client(&sa_client); if (ret) { pr_err("Couldn't register ib_sa client\n"); goto err1; } ret = mcast_init(); if (ret) { pr_err("Couldn't initialize multicast handling\n"); goto err2; } ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM); if (!ib_nl_wq) { ret = -ENOMEM; goto err3; } INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); return 0; err3: mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: return ret; } void ib_sa_cleanup(void) { cancel_delayed_work(&ib_nl_timed_work); destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); WARN_ON(!xa_empty(&queries)); }
36 85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_SYNPROXY_H #define _NF_CONNTRACK_SYNPROXY_H #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netns/generic.h> struct nf_conn_synproxy { u32 isn; u32 its; u32 tsoff; }; static inline struct nf_conn_synproxy *nfct_synproxy(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) return nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY); #else return NULL; #endif } static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) return nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY, GFP_ATOMIC); #else return NULL; #endif } static inline bool nf_ct_add_synproxy(struct nf_conn *ct, const struct nf_conn *tmpl) { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) if (tmpl && nfct_synproxy(tmpl)) { if (!nfct_seqadj_ext_add(ct)) return false; if (!nfct_synproxy_ext_add(ct)) return false; } #endif return true; } #endif /* _NF_CONNTRACK_SYNPROXY_H */
2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 // SPDX-License-Identifier: GPL-2.0-or-later // // core.c -- Voltage/Current Regulator framework. // // Copyright 2007, 2008 Wolfson Microelectronics PLC. // Copyright 2008 SlimLogic Ltd. // // Author: Liam Girdwood <lrg@slimlogic.co.uk> #include <linux/kernel.h> #include <linux/init.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/slab.h> #include <linux/async.h> #include <linux/err.h> #include <linux/mutex.h> #include <linux/suspend.h> #include <linux/delay.h> #include <linux/gpio/consumer.h> #include <linux/of.h> #include <linux/reboot.h> #include <linux/regmap.h> #include <linux/regulator/of_regulator.h> #include <linux/regulator/consumer.h> #include <linux/regulator/coupler.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> #include <linux/module.h> #define CREATE_TRACE_POINTS #include <trace/events/regulator.h> #include "dummy.h" #include "internal.h" #include "regnl.h" static DEFINE_WW_CLASS(regulator_ww_class); static DEFINE_MUTEX(regulator_nesting_mutex); static DEFINE_MUTEX(regulator_list_mutex); static LIST_HEAD(regulator_map_list); static LIST_HEAD(regulator_ena_gpio_list); static LIST_HEAD(regulator_supply_alias_list); static LIST_HEAD(regulator_coupler_list); static bool has_full_constraints; static const struct bus_type regulator_bus; static struct dentry *debugfs_root; /* * struct regulator_map * * Used to provide symbolic supply names to devices. */ struct regulator_map { struct list_head list; const char *dev_name; /* The dev_name() for the consumer */ const char *supply; struct regulator_dev *regulator; }; /* * struct regulator_enable_gpio * * Management for shared enable GPIO pin */ struct regulator_enable_gpio { struct list_head list; struct gpio_desc *gpiod; u32 enable_count; /* a number of enabled shared GPIO */ u32 request_count; /* a number of requested shared GPIO */ }; /* * struct regulator_supply_alias * * Used to map lookups for a supply onto an alternative device. */ struct regulator_supply_alias { struct list_head list; struct device *src_dev; const char *src_supply; struct device *alias_dev; const char *alias_supply; }; /* * Work item used to forward regulator events. * * @work: workqueue entry * @rdev: regulator device to notify (consumer receiving the forwarded event) * @event: event code to be forwarded */ struct regulator_event_work { struct work_struct work; struct regulator_dev *rdev; unsigned long event; }; static int _regulator_enable(struct regulator *regulator); static int _regulator_is_enabled(struct regulator_dev *rdev); static int _regulator_disable(struct regulator *regulator); static int _regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); static int regulator_balance_voltage(struct regulator_dev *rdev, suspend_state_t state); static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, const char *supply_name); static void destroy_regulator(struct regulator *regulator); static void _regulator_put(struct regulator *regulator); const char *rdev_get_name(struct regulator_dev *rdev) { if (rdev->constraints && rdev->constraints->name) return rdev->constraints->name; else if (rdev->desc->name) return rdev->desc->name; else return ""; } EXPORT_SYMBOL_GPL(rdev_get_name); static bool have_full_constraints(void) { return has_full_constraints || of_have_populated_dt(); } static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops) { if (!rdev->constraints) { rdev_err(rdev, "no constraints\n"); return false; } if (rdev->constraints->valid_ops_mask & ops) return true; return false; } /** * regulator_lock_nested - lock a single regulator * @rdev: regulator source * @ww_ctx: w/w mutex acquire context * * This function can be called many times by one task on * a single regulator and its mutex will be locked only * once. If a task, which is calling this function is other * than the one, which initially locked the mutex, it will * wait on mutex. * * Return: 0 on success or a negative error number on failure. */ static inline int regulator_lock_nested(struct regulator_dev *rdev, struct ww_acquire_ctx *ww_ctx) { bool lock = false; int ret = 0; mutex_lock(&regulator_nesting_mutex); if (!ww_mutex_trylock(&rdev->mutex, ww_ctx)) { if (rdev->mutex_owner == current) rdev->ref_cnt++; else lock = true; if (lock) { mutex_unlock(&regulator_nesting_mutex); ret = ww_mutex_lock(&rdev->mutex, ww_ctx); mutex_lock(&regulator_nesting_mutex); } } else { lock = true; } if (lock && ret != -EDEADLK) { rdev->ref_cnt++; rdev->mutex_owner = current; } mutex_unlock(&regulator_nesting_mutex); return ret; } /** * regulator_lock - lock a single regulator * @rdev: regulator source * * This function can be called many times by one task on * a single regulator and its mutex will be locked only * once. If a task, which is calling this function is other * than the one, which initially locked the mutex, it will * wait on mutex. */ static void regulator_lock(struct regulator_dev *rdev) { regulator_lock_nested(rdev, NULL); } /** * regulator_unlock - unlock a single regulator * @rdev: regulator_source * * This function unlocks the mutex when the * reference counter reaches 0. */ static void regulator_unlock(struct regulator_dev *rdev) { mutex_lock(&regulator_nesting_mutex); if (--rdev->ref_cnt == 0) { rdev->mutex_owner = NULL; ww_mutex_unlock(&rdev->mutex); } WARN_ON_ONCE(rdev->ref_cnt < 0); mutex_unlock(&regulator_nesting_mutex); } /** * regulator_lock_two - lock two regulators * @rdev1: first regulator * @rdev2: second regulator * @ww_ctx: w/w mutex acquire context * * Locks both rdevs using the regulator_ww_class. */ static void regulator_lock_two(struct regulator_dev *rdev1, struct regulator_dev *rdev2, struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *held, *contended; int ret; ww_acquire_init(ww_ctx, &regulator_ww_class); /* Try to just grab both of them */ ret = regulator_lock_nested(rdev1, ww_ctx); WARN_ON(ret); ret = regulator_lock_nested(rdev2, ww_ctx); if (ret != -EDEADLOCK) { WARN_ON(ret); goto exit; } held = rdev1; contended = rdev2; while (true) { regulator_unlock(held); ww_mutex_lock_slow(&contended->mutex, ww_ctx); contended->ref_cnt++; contended->mutex_owner = current; swap(held, contended); ret = regulator_lock_nested(contended, ww_ctx); if (ret != -EDEADLOCK) { WARN_ON(ret); break; } } exit: ww_acquire_done(ww_ctx); } /** * regulator_unlock_two - unlock two regulators * @rdev1: first regulator * @rdev2: second regulator * @ww_ctx: w/w mutex acquire context * * The inverse of regulator_lock_two(). */ static void regulator_unlock_two(struct regulator_dev *rdev1, struct regulator_dev *rdev2, struct ww_acquire_ctx *ww_ctx) { regulator_unlock(rdev2); regulator_unlock(rdev1); ww_acquire_fini(ww_ctx); } static bool regulator_supply_is_couple(struct regulator_dev *rdev) { struct regulator_dev *c_rdev; int i; for (i = 1; i < rdev->coupling_desc.n_coupled; i++) { c_rdev = rdev->coupling_desc.coupled_rdevs[i]; if (rdev->supply->rdev == c_rdev) return true; } return false; } static void regulator_unlock_recursive(struct regulator_dev *rdev, unsigned int n_coupled) { struct regulator_dev *c_rdev, *supply_rdev; int i, supply_n_coupled; for (i = n_coupled; i > 0; i--) { c_rdev = rdev->coupling_desc.coupled_rdevs[i - 1]; if (!c_rdev) continue; if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) { supply_rdev = c_rdev->supply->rdev; supply_n_coupled = supply_rdev->coupling_desc.n_coupled; regulator_unlock_recursive(supply_rdev, supply_n_coupled); } regulator_unlock(c_rdev); } } static int regulator_lock_recursive(struct regulator_dev *rdev, struct regulator_dev **new_contended_rdev, struct regulator_dev **old_contended_rdev, struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *c_rdev; int i, err; for (i = 0; i < rdev->coupling_desc.n_coupled; i++) { c_rdev = rdev->coupling_desc.coupled_rdevs[i]; if (!c_rdev) continue; if (c_rdev != *old_contended_rdev) { err = regulator_lock_nested(c_rdev, ww_ctx); if (err) { if (err == -EDEADLK) { *new_contended_rdev = c_rdev; goto err_unlock; } /* shouldn't happen */ WARN_ON_ONCE(err != -EALREADY); } } else { *old_contended_rdev = NULL; } if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) { err = regulator_lock_recursive(c_rdev->supply->rdev, new_contended_rdev, old_contended_rdev, ww_ctx); if (err) { regulator_unlock(c_rdev); goto err_unlock; } } } return 0; err_unlock: regulator_unlock_recursive(rdev, i); return err; } /** * regulator_unlock_dependent - unlock regulator's suppliers and coupled * regulators * @rdev: regulator source * @ww_ctx: w/w mutex acquire context * * Unlock all regulators related with rdev by coupling or supplying. */ static void regulator_unlock_dependent(struct regulator_dev *rdev, struct ww_acquire_ctx *ww_ctx) { regulator_unlock_recursive(rdev, rdev->coupling_desc.n_coupled); ww_acquire_fini(ww_ctx); } /** * regulator_lock_dependent - lock regulator's suppliers and coupled regulators * @rdev: regulator source * @ww_ctx: w/w mutex acquire context * * This function as a wrapper on regulator_lock_recursive(), which locks * all regulators related with rdev by coupling or supplying. */ static void regulator_lock_dependent(struct regulator_dev *rdev, struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *new_contended_rdev = NULL; struct regulator_dev *old_contended_rdev = NULL; int err; mutex_lock(&regulator_list_mutex); ww_acquire_init(ww_ctx, &regulator_ww_class); do { if (new_contended_rdev) { ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx); old_contended_rdev = new_contended_rdev; old_contended_rdev->ref_cnt++; old_contended_rdev->mutex_owner = current; } err = regulator_lock_recursive(rdev, &new_contended_rdev, &old_contended_rdev, ww_ctx); if (old_contended_rdev) regulator_unlock(old_contended_rdev); } while (err == -EDEADLK); ww_acquire_done(ww_ctx); mutex_unlock(&regulator_list_mutex); } /* Platform voltage constraint check */ int regulator_check_voltage(struct regulator_dev *rdev, int *min_uV, int *max_uV) { BUG_ON(*min_uV > *max_uV); if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { rdev_err(rdev, "voltage operation not allowed\n"); return -EPERM; } if (*max_uV > rdev->constraints->max_uV) *max_uV = rdev->constraints->max_uV; if (*min_uV < rdev->constraints->min_uV) *min_uV = rdev->constraints->min_uV; if (*min_uV > *max_uV) { rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", *min_uV, *max_uV); return -EINVAL; } return 0; } /* return 0 if the state is valid */ static int regulator_check_states(suspend_state_t state) { return (state > PM_SUSPEND_MAX || state == PM_SUSPEND_TO_IDLE); } /* Make sure we select a voltage that suits the needs of all * regulator consumers */ int regulator_check_consumers(struct regulator_dev *rdev, int *min_uV, int *max_uV, suspend_state_t state) { struct regulator *regulator; struct regulator_voltage *voltage; list_for_each_entry(regulator, &rdev->consumer_list, list) { voltage = &regulator->voltage[state]; /* * Assume consumers that didn't say anything are OK * with anything in the constraint range. */ if (!voltage->min_uV && !voltage->max_uV) continue; if (*max_uV > voltage->max_uV) *max_uV = voltage->max_uV; if (*min_uV < voltage->min_uV) *min_uV = voltage->min_uV; } if (*min_uV > *max_uV) { rdev_err(rdev, "Restricting voltage, %u-%uuV\n", *min_uV, *max_uV); return -EINVAL; } return 0; } /* current constraint check */ static int regulator_check_current_limit(struct regulator_dev *rdev, int *min_uA, int *max_uA) { BUG_ON(*min_uA > *max_uA); if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_CURRENT)) { rdev_err(rdev, "current operation not allowed\n"); return -EPERM; } if (*max_uA > rdev->constraints->max_uA && rdev->constraints->max_uA) *max_uA = rdev->constraints->max_uA; if (*min_uA < rdev->constraints->min_uA) *min_uA = rdev->constraints->min_uA; if (*min_uA > *max_uA) { rdev_err(rdev, "unsupportable current range: %d-%duA\n", *min_uA, *max_uA); return -EINVAL; } return 0; } /* operating mode constraint check */ static int regulator_mode_constrain(struct regulator_dev *rdev, unsigned int *mode) { switch (*mode) { case REGULATOR_MODE_FAST: case REGULATOR_MODE_NORMAL: case REGULATOR_MODE_IDLE: case REGULATOR_MODE_STANDBY: break; default: rdev_err(rdev, "invalid mode %x specified\n", *mode); return -EINVAL; } if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_MODE)) { rdev_err(rdev, "mode operation not allowed\n"); return -EPERM; } /* The modes are bitmasks, the most power hungry modes having * the lowest values. If the requested mode isn't supported * try higher modes. */ while (*mode) { if (rdev->constraints->valid_modes_mask & *mode) return 0; *mode /= 2; } return -EINVAL; } static inline struct regulator_state * regulator_get_suspend_state(struct regulator_dev *rdev, suspend_state_t state) { if (rdev->constraints == NULL) return NULL; switch (state) { case PM_SUSPEND_STANDBY: return &rdev->constraints->state_standby; case PM_SUSPEND_MEM: return &rdev->constraints->state_mem; case PM_SUSPEND_MAX: return &rdev->constraints->state_disk; default: return NULL; } } static const struct regulator_state * regulator_get_suspend_state_check(struct regulator_dev *rdev, suspend_state_t state) { const struct regulator_state *rstate; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return NULL; /* If we have no suspend mode configuration don't set anything; * only warn if the driver implements set_suspend_voltage or * set_suspend_mode callback. */ if (rstate->enabled != ENABLE_IN_SUSPEND && rstate->enabled != DISABLE_IN_SUSPEND) { if (rdev->desc->ops->set_suspend_voltage || rdev->desc->ops->set_suspend_mode) rdev_warn(rdev, "No configuration\n"); return NULL; } return rstate; } static ssize_t microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); int uV; regulator_lock(rdev); uV = regulator_get_voltage_rdev(rdev); regulator_unlock(rdev); if (uV < 0) return uV; return sprintf(buf, "%d\n", uV); } static DEVICE_ATTR_RO(microvolts); static ssize_t microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev)); } static DEVICE_ATTR_RO(microamps); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%s\n", rdev_get_name(rdev)); } static DEVICE_ATTR_RO(name); static const char *regulator_opmode_to_str(int mode) { switch (mode) { case REGULATOR_MODE_FAST: return "fast"; case REGULATOR_MODE_NORMAL: return "normal"; case REGULATOR_MODE_IDLE: return "idle"; case REGULATOR_MODE_STANDBY: return "standby"; } return "unknown"; } static ssize_t regulator_print_opmode(char *buf, int mode) { return sprintf(buf, "%s\n", regulator_opmode_to_str(mode)); } static ssize_t opmode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, _regulator_get_mode(rdev)); } static DEVICE_ATTR_RO(opmode); static ssize_t regulator_print_state(char *buf, int state) { if (state > 0) return sprintf(buf, "enabled\n"); else if (state == 0) return sprintf(buf, "disabled\n"); else return sprintf(buf, "unknown\n"); } static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); ssize_t ret; regulator_lock(rdev); ret = regulator_print_state(buf, _regulator_is_enabled(rdev)); regulator_unlock(rdev); return ret; } static DEVICE_ATTR_RO(state); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); int status; char *label; status = rdev->desc->ops->get_status(rdev); if (status < 0) return status; switch (status) { case REGULATOR_STATUS_OFF: label = "off"; break; case REGULATOR_STATUS_ON: label = "on"; break; case REGULATOR_STATUS_ERROR: label = "error"; break; case REGULATOR_STATUS_FAST: label = "fast"; break; case REGULATOR_STATUS_NORMAL: label = "normal"; break; case REGULATOR_STATUS_IDLE: label = "idle"; break; case REGULATOR_STATUS_STANDBY: label = "standby"; break; case REGULATOR_STATUS_BYPASS: label = "bypass"; break; case REGULATOR_STATUS_UNDEFINED: label = "undefined"; break; default: return -ERANGE; } return sprintf(buf, "%s\n", label); } static DEVICE_ATTR_RO(status); static ssize_t min_microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->min_uA); } static DEVICE_ATTR_RO(min_microamps); static ssize_t max_microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->max_uA); } static DEVICE_ATTR_RO(max_microamps); static ssize_t min_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->min_uV); } static DEVICE_ATTR_RO(min_microvolts); static ssize_t max_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); if (!rdev->constraints) return sprintf(buf, "constraint not defined\n"); return sprintf(buf, "%d\n", rdev->constraints->max_uV); } static DEVICE_ATTR_RO(max_microvolts); static ssize_t requested_microamps_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); struct regulator *regulator; int uA = 0; regulator_lock(rdev); list_for_each_entry(regulator, &rdev->consumer_list, list) { if (regulator->enable_count) uA += regulator->uA_load; } regulator_unlock(rdev); return sprintf(buf, "%d\n", uA); } static DEVICE_ATTR_RO(requested_microamps); static ssize_t num_users_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->use_count); } static DEVICE_ATTR_RO(num_users); static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); switch (rdev->desc->type) { case REGULATOR_VOLTAGE: return sprintf(buf, "voltage\n"); case REGULATOR_CURRENT: return sprintf(buf, "current\n"); } return sprintf(buf, "unknown\n"); } static DEVICE_ATTR_RO(type); static ssize_t suspend_mem_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV); } static DEVICE_ATTR_RO(suspend_mem_microvolts); static ssize_t suspend_disk_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV); } static DEVICE_ATTR_RO(suspend_disk_microvolts); static ssize_t suspend_standby_microvolts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV); } static DEVICE_ATTR_RO(suspend_standby_microvolts); static ssize_t suspend_mem_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, rdev->constraints->state_mem.mode); } static DEVICE_ATTR_RO(suspend_mem_mode); static ssize_t suspend_disk_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, rdev->constraints->state_disk.mode); } static DEVICE_ATTR_RO(suspend_disk_mode); static ssize_t suspend_standby_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_opmode(buf, rdev->constraints->state_standby.mode); } static DEVICE_ATTR_RO(suspend_standby_mode); static ssize_t suspend_mem_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_state(buf, rdev->constraints->state_mem.enabled); } static DEVICE_ATTR_RO(suspend_mem_state); static ssize_t suspend_disk_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_state(buf, rdev->constraints->state_disk.enabled); } static DEVICE_ATTR_RO(suspend_disk_state); static ssize_t suspend_standby_state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return regulator_print_state(buf, rdev->constraints->state_standby.enabled); } static DEVICE_ATTR_RO(suspend_standby_state); static ssize_t bypass_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); const char *report; bool bypass; int ret; ret = rdev->desc->ops->get_bypass(rdev, &bypass); if (ret != 0) report = "unknown"; else if (bypass) report = "enabled"; else report = "disabled"; return sprintf(buf, "%s\n", report); } static DEVICE_ATTR_RO(bypass); static ssize_t power_budget_milliwatt_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->constraints->pw_budget_mW); } static DEVICE_ATTR_RO(power_budget_milliwatt); static ssize_t power_requested_milliwatt_show(struct device *dev, struct device_attribute *attr, char *buf) { struct regulator_dev *rdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", rdev->pw_requested_mW); } static DEVICE_ATTR_RO(power_requested_milliwatt); #define REGULATOR_ERROR_ATTR(name, bit) \ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ int ret; \ unsigned int flags; \ struct regulator_dev *rdev = dev_get_drvdata(dev); \ ret = _regulator_get_error_flags(rdev, &flags); \ if (ret) \ return ret; \ return sysfs_emit(buf, "%d\n", !!(flags & (bit))); \ } \ static DEVICE_ATTR_RO(name) REGULATOR_ERROR_ATTR(under_voltage, REGULATOR_ERROR_UNDER_VOLTAGE); REGULATOR_ERROR_ATTR(over_current, REGULATOR_ERROR_OVER_CURRENT); REGULATOR_ERROR_ATTR(regulation_out, REGULATOR_ERROR_REGULATION_OUT); REGULATOR_ERROR_ATTR(fail, REGULATOR_ERROR_FAIL); REGULATOR_ERROR_ATTR(over_temp, REGULATOR_ERROR_OVER_TEMP); REGULATOR_ERROR_ATTR(under_voltage_warn, REGULATOR_ERROR_UNDER_VOLTAGE_WARN); REGULATOR_ERROR_ATTR(over_current_warn, REGULATOR_ERROR_OVER_CURRENT_WARN); REGULATOR_ERROR_ATTR(over_voltage_warn, REGULATOR_ERROR_OVER_VOLTAGE_WARN); REGULATOR_ERROR_ATTR(over_temp_warn, REGULATOR_ERROR_OVER_TEMP_WARN); /* Calculate the new optimum regulator operating mode based on the new total * consumer load. All locks held by caller */ static int drms_uA_update(struct regulator_dev *rdev) { struct regulator *sibling; int current_uA = 0, output_uV, input_uV, err; unsigned int mode; /* * first check to see if we can set modes at all, otherwise just * tell the consumer everything is OK. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS)) { rdev_dbg(rdev, "DRMS operation not allowed\n"); return 0; } if (!rdev->desc->ops->get_optimum_mode && !rdev->desc->ops->set_load) return 0; if (!rdev->desc->ops->set_mode && !rdev->desc->ops->set_load) return -EINVAL; /* calc total requested load */ list_for_each_entry(sibling, &rdev->consumer_list, list) { if (sibling->enable_count) current_uA += sibling->uA_load; } current_uA += rdev->constraints->system_load; if (rdev->desc->ops->set_load) { /* set the optimum mode for our new total regulator load */ err = rdev->desc->ops->set_load(rdev, current_uA); if (err < 0) rdev_err(rdev, "failed to set load %d: %pe\n", current_uA, ERR_PTR(err)); } else { /* * Unfortunately in some cases the constraints->valid_ops has * REGULATOR_CHANGE_DRMS but there are no valid modes listed. * That's not really legit but we won't consider it a fatal * error here. We'll treat it as if REGULATOR_CHANGE_DRMS * wasn't set. */ if (!rdev->constraints->valid_modes_mask) { rdev_dbg(rdev, "Can change modes; but no valid mode\n"); return 0; } /* get output voltage */ output_uV = regulator_get_voltage_rdev(rdev); /* * Don't return an error; if regulator driver cares about * output_uV then it's up to the driver to validate. */ if (output_uV <= 0) rdev_dbg(rdev, "invalid output voltage found\n"); /* get input voltage */ input_uV = 0; if (rdev->supply) input_uV = regulator_get_voltage_rdev(rdev->supply->rdev); if (input_uV <= 0) input_uV = rdev->constraints->input_uV; /* * Don't return an error; if regulator driver cares about * input_uV then it's up to the driver to validate. */ if (input_uV <= 0) rdev_dbg(rdev, "invalid input voltage found\n"); /* now get the optimum mode for our new total regulator load */ mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV, output_uV, current_uA); /* check the new mode is allowed */ err = regulator_mode_constrain(rdev, &mode); if (err < 0) { rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV: %pe\n", current_uA, input_uV, output_uV, ERR_PTR(err)); return err; } err = rdev->desc->ops->set_mode(rdev, mode); if (err < 0) rdev_err(rdev, "failed to set optimum mode %x: %pe\n", mode, ERR_PTR(err)); } return err; } static int __suspend_set_state(struct regulator_dev *rdev, const struct regulator_state *rstate) { int ret = 0; if (rstate->enabled == ENABLE_IN_SUSPEND && rdev->desc->ops->set_suspend_enable) ret = rdev->desc->ops->set_suspend_enable(rdev); else if (rstate->enabled == DISABLE_IN_SUSPEND && rdev->desc->ops->set_suspend_disable) ret = rdev->desc->ops->set_suspend_disable(rdev); else /* OK if set_suspend_enable or set_suspend_disable is NULL */ ret = 0; if (ret < 0) { rdev_err(rdev, "failed to enabled/disable: %pe\n", ERR_PTR(ret)); return ret; } if (rdev->desc->ops->set_suspend_voltage && rstate->uV > 0) { ret = rdev->desc->ops->set_suspend_voltage(rdev, rstate->uV); if (ret < 0) { rdev_err(rdev, "failed to set voltage: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->desc->ops->set_suspend_mode && rstate->mode > 0) { ret = rdev->desc->ops->set_suspend_mode(rdev, rstate->mode); if (ret < 0) { rdev_err(rdev, "failed to set mode: %pe\n", ERR_PTR(ret)); return ret; } } return ret; } static int suspend_set_initial_state(struct regulator_dev *rdev) { const struct regulator_state *rstate; rstate = regulator_get_suspend_state_check(rdev, rdev->constraints->initial_state); if (!rstate) return 0; return __suspend_set_state(rdev, rstate); } #if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) static void print_constraints_debug(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; char buf[160] = ""; size_t len = sizeof(buf) - 1; int count = 0; int ret; if (constraints->min_uV && constraints->max_uV) { if (constraints->min_uV == constraints->max_uV) count += scnprintf(buf + count, len - count, "%d mV ", constraints->min_uV / 1000); else count += scnprintf(buf + count, len - count, "%d <--> %d mV ", constraints->min_uV / 1000, constraints->max_uV / 1000); } if (!constraints->min_uV || constraints->min_uV != constraints->max_uV) { ret = regulator_get_voltage_rdev(rdev); if (ret > 0) count += scnprintf(buf + count, len - count, "at %d mV ", ret / 1000); } if (constraints->uV_offset) count += scnprintf(buf + count, len - count, "%dmV offset ", constraints->uV_offset / 1000); if (constraints->min_uA && constraints->max_uA) { if (constraints->min_uA == constraints->max_uA) count += scnprintf(buf + count, len - count, "%d mA ", constraints->min_uA / 1000); else count += scnprintf(buf + count, len - count, "%d <--> %d mA ", constraints->min_uA / 1000, constraints->max_uA / 1000); } if (!constraints->min_uA || constraints->min_uA != constraints->max_uA) { ret = _regulator_get_current_limit(rdev); if (ret > 0) count += scnprintf(buf + count, len - count, "at %d mA ", ret / 1000); } if (constraints->valid_modes_mask & REGULATOR_MODE_FAST) count += scnprintf(buf + count, len - count, "fast "); if (constraints->valid_modes_mask & REGULATOR_MODE_NORMAL) count += scnprintf(buf + count, len - count, "normal "); if (constraints->valid_modes_mask & REGULATOR_MODE_IDLE) count += scnprintf(buf + count, len - count, "idle "); if (constraints->valid_modes_mask & REGULATOR_MODE_STANDBY) count += scnprintf(buf + count, len - count, "standby "); if (constraints->pw_budget_mW) count += scnprintf(buf + count, len - count, "%d mW budget ", constraints->pw_budget_mW); if (!count) count = scnprintf(buf, len, "no parameters"); else --count; count += scnprintf(buf + count, len - count, ", %s", _regulator_is_enabled(rdev) ? "enabled" : "disabled"); rdev_dbg(rdev, "%s\n", buf); } #else /* !DEBUG && !CONFIG_DYNAMIC_DEBUG */ static inline void print_constraints_debug(struct regulator_dev *rdev) {} #endif /* !DEBUG && !CONFIG_DYNAMIC_DEBUG */ static void print_constraints(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; print_constraints_debug(rdev); if ((constraints->min_uV != constraints->max_uV) && !regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) rdev_warn(rdev, "Voltage range but no REGULATOR_CHANGE_VOLTAGE\n"); } static int machine_constraints_voltage(struct regulator_dev *rdev, struct regulation_constraints *constraints) { const struct regulator_ops *ops = rdev->desc->ops; int ret; /* do we need to apply the constraint voltage */ if (rdev->constraints->apply_uV && rdev->constraints->min_uV && rdev->constraints->max_uV) { int target_min, target_max; int current_uV = regulator_get_voltage_rdev(rdev); if (current_uV == -ENOTRECOVERABLE) { /* This regulator can't be read and must be initialized */ rdev_info(rdev, "Setting %d-%duV\n", rdev->constraints->min_uV, rdev->constraints->max_uV); _regulator_do_set_voltage(rdev, rdev->constraints->min_uV, rdev->constraints->max_uV); current_uV = regulator_get_voltage_rdev(rdev); } if (current_uV < 0) { if (current_uV != -EPROBE_DEFER) rdev_err(rdev, "failed to get the current voltage: %pe\n", ERR_PTR(current_uV)); return current_uV; } /* * If we're below the minimum voltage move up to the * minimum voltage, if we're above the maximum voltage * then move down to the maximum. */ target_min = current_uV; target_max = current_uV; if (current_uV < rdev->constraints->min_uV) { target_min = rdev->constraints->min_uV; target_max = rdev->constraints->min_uV; } if (current_uV > rdev->constraints->max_uV) { target_min = rdev->constraints->max_uV; target_max = rdev->constraints->max_uV; } if (target_min != current_uV || target_max != current_uV) { rdev_info(rdev, "Bringing %duV into %d-%duV\n", current_uV, target_min, target_max); ret = _regulator_do_set_voltage( rdev, target_min, target_max); if (ret < 0) { rdev_err(rdev, "failed to apply %d-%duV constraint: %pe\n", target_min, target_max, ERR_PTR(ret)); return ret; } } } /* constrain machine-level voltage specs to fit * the actual range supported by this regulator. */ if (ops->list_voltage && rdev->desc->n_voltages) { int count = rdev->desc->n_voltages; int i; int min_uV = INT_MAX; int max_uV = INT_MIN; int cmin = constraints->min_uV; int cmax = constraints->max_uV; /* it's safe to autoconfigure fixed-voltage supplies * and the constraints are used by list_voltage. */ if (count == 1 && !cmin) { cmin = 1; cmax = INT_MAX; constraints->min_uV = cmin; constraints->max_uV = cmax; } /* voltage constraints are optional */ if ((cmin == 0) && (cmax == 0)) return 0; /* else require explicit machine-level constraints */ if (cmin <= 0 || cmax <= 0 || cmax < cmin) { rdev_err(rdev, "invalid voltage constraints\n"); return -EINVAL; } /* no need to loop voltages if range is continuous */ if (rdev->desc->continuous_voltage_range) return 0; /* initial: [cmin..cmax] valid, [min_uV..max_uV] not */ for (i = 0; i < count; i++) { int value; value = ops->list_voltage(rdev, i); if (value <= 0) continue; /* maybe adjust [min_uV..max_uV] */ if (value >= cmin && value < min_uV) min_uV = value; if (value <= cmax && value > max_uV) max_uV = value; } /* final: [min_uV..max_uV] valid iff constraints valid */ if (max_uV < min_uV) { rdev_err(rdev, "unsupportable voltage constraints %u-%uuV\n", min_uV, max_uV); return -EINVAL; } /* use regulator's subset of machine constraints */ if (constraints->min_uV < min_uV) { rdev_dbg(rdev, "override min_uV, %d -> %d\n", constraints->min_uV, min_uV); constraints->min_uV = min_uV; } if (constraints->max_uV > max_uV) { rdev_dbg(rdev, "override max_uV, %d -> %d\n", constraints->max_uV, max_uV); constraints->max_uV = max_uV; } } return 0; } static int machine_constraints_current(struct regulator_dev *rdev, struct regulation_constraints *constraints) { const struct regulator_ops *ops = rdev->desc->ops; int ret; if (!constraints->min_uA && !constraints->max_uA) return 0; if (constraints->min_uA > constraints->max_uA) { rdev_err(rdev, "Invalid current constraints\n"); return -EINVAL; } if (!ops->set_current_limit || !ops->get_current_limit) { rdev_warn(rdev, "Operation of current configuration missing\n"); return 0; } /* Set regulator current in constraints range */ ret = ops->set_current_limit(rdev, constraints->min_uA, constraints->max_uA); if (ret < 0) { rdev_err(rdev, "Failed to set current constraint, %d\n", ret); return ret; } return 0; } static int _regulator_do_enable(struct regulator_dev *rdev); static int notif_set_limit(struct regulator_dev *rdev, int (*set)(struct regulator_dev *, int, int, bool), int limit, int severity) { bool enable; if (limit == REGULATOR_NOTIF_LIMIT_DISABLE) { enable = false; limit = 0; } else { enable = true; } if (limit == REGULATOR_NOTIF_LIMIT_ENABLE) limit = 0; return set(rdev, limit, severity, enable); } static int handle_notify_limits(struct regulator_dev *rdev, int (*set)(struct regulator_dev *, int, int, bool), struct notification_limit *limits) { int ret = 0; if (!set) return -EOPNOTSUPP; if (limits->prot) ret = notif_set_limit(rdev, set, limits->prot, REGULATOR_SEVERITY_PROT); if (ret) return ret; if (limits->err) ret = notif_set_limit(rdev, set, limits->err, REGULATOR_SEVERITY_ERR); if (ret) return ret; if (limits->warn) ret = notif_set_limit(rdev, set, limits->warn, REGULATOR_SEVERITY_WARN); return ret; } /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source * @is_locked: whether or not this is called with locks held already * * Allows platform initialisation code to define and constrain * regulator circuits e.g. valid voltage/current ranges, etc. NOTE: * Constraints *must* be set by platform code in order for some * regulator operations to proceed i.e. set_voltage, set_current_limit, * set_mode. * * Return: 0 on success or a negative error number on failure. */ static int set_machine_constraints(struct regulator_dev *rdev, bool is_locked) { int ret = 0; const struct regulator_ops *ops = rdev->desc->ops; /* * If there is no mechanism for controlling the regulator then * flag it as always_on so we don't end up duplicating checks * for this so much. Note that we could control the state of * a supply to control the output on a regulator that has no * direct control. */ if (!rdev->ena_pin && !ops->enable) { if (rdev->supply_name && !rdev->supply) return -EPROBE_DEFER; if (rdev->supply) rdev->constraints->always_on = rdev->supply->rdev->constraints->always_on; else rdev->constraints->always_on = true; } /* * If we want to enable this regulator, make sure that we know the * supplying regulator. */ if (rdev->constraints->always_on || rdev->constraints->boot_on) { if (rdev->supply_name && !rdev->supply) return -EPROBE_DEFER; } ret = machine_constraints_voltage(rdev, rdev->constraints); if (ret != 0) return ret; ret = machine_constraints_current(rdev, rdev->constraints); if (ret != 0) return ret; if (rdev->constraints->ilim_uA && ops->set_input_current_limit) { ret = ops->set_input_current_limit(rdev, rdev->constraints->ilim_uA); if (ret < 0) { rdev_err(rdev, "failed to set input limit: %pe\n", ERR_PTR(ret)); return ret; } } /* do we need to setup our suspend state */ if (rdev->constraints->initial_state) { ret = suspend_set_initial_state(rdev); if (ret < 0) { rdev_err(rdev, "failed to set suspend state: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->initial_mode) { if (!ops->set_mode) { rdev_err(rdev, "no set_mode operation\n"); return -EINVAL; } ret = ops->set_mode(rdev, rdev->constraints->initial_mode); if (ret < 0) { rdev_err(rdev, "failed to set initial mode: %pe\n", ERR_PTR(ret)); return ret; } } else if (rdev->constraints->system_load) { /* * We'll only apply the initial system load if an * initial mode wasn't specified. */ drms_uA_update(rdev); } if ((rdev->constraints->ramp_delay || rdev->constraints->ramp_disable) && ops->set_ramp_delay) { ret = ops->set_ramp_delay(rdev, rdev->constraints->ramp_delay); if (ret < 0) { rdev_err(rdev, "failed to set ramp_delay: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->pull_down && ops->set_pull_down) { ret = ops->set_pull_down(rdev); if (ret < 0) { rdev_err(rdev, "failed to set pull down: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->soft_start && ops->set_soft_start) { ret = ops->set_soft_start(rdev); if (ret < 0) { rdev_err(rdev, "failed to set soft start: %pe\n", ERR_PTR(ret)); return ret; } } /* * Existing logic does not warn if over_current_protection is given as * a constraint but driver does not support that. I think we should * warn about this type of issues as it is possible someone changes * PMIC on board to another type - and the other PMIC's driver does * not support setting protection. Board composer may happily believe * the DT limits are respected - especially if the new PMIC HW also * supports protection but the driver does not. I won't change the logic * without hearing more experienced opinion on this though. * * If warning is seen as a good idea then we can merge handling the * over-curret protection and detection and get rid of this special * handling. */ if (rdev->constraints->over_current_protection && ops->set_over_current_protection) { int lim = rdev->constraints->over_curr_limits.prot; ret = ops->set_over_current_protection(rdev, lim, REGULATOR_SEVERITY_PROT, true); if (ret < 0) { rdev_err(rdev, "failed to set over current protection: %pe\n", ERR_PTR(ret)); return ret; } } if (rdev->constraints->over_current_detection) ret = handle_notify_limits(rdev, ops->set_over_current_protection, &rdev->constraints->over_curr_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set over current limits: %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested over-current limits\n"); } if (rdev->constraints->over_voltage_detection) ret = handle_notify_limits(rdev, ops->set_over_voltage_protection, &rdev->constraints->over_voltage_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set over voltage limits %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested over voltage limits\n"); } if (rdev->constraints->under_voltage_detection) ret = handle_notify_limits(rdev, ops->set_under_voltage_protection, &rdev->constraints->under_voltage_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set under voltage limits %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested under voltage limits\n"); } if (rdev->constraints->over_temp_detection) ret = handle_notify_limits(rdev, ops->set_thermal_protection, &rdev->constraints->temp_limits); if (ret) { if (ret != -EOPNOTSUPP) { rdev_err(rdev, "failed to set temperature limits %pe\n", ERR_PTR(ret)); return ret; } rdev_warn(rdev, "IC does not support requested temperature limits\n"); } if (rdev->constraints->active_discharge && ops->set_active_discharge) { bool ad_state = rdev->constraints->active_discharge == REGULATOR_ACTIVE_DISCHARGE_ENABLE; ret = ops->set_active_discharge(rdev, ad_state); if (ret < 0) { rdev_err(rdev, "failed to set active discharge: %pe\n", ERR_PTR(ret)); return ret; } } /* If the constraints say the regulator should be on at this point * and we have control then make sure it is enabled. */ if (rdev->constraints->always_on || rdev->constraints->boot_on) { bool supply_enabled = false; /* We have ensured a potential supply has been resolved above. * * If supplying regulator has already been enabled, * it's not intended to have use_count increment * when rdev is only boot-on. */ if (rdev->supply && (rdev->constraints->always_on || !regulator_is_enabled(rdev->supply))) { ret = (is_locked ? _regulator_enable(rdev->supply) : regulator_enable(rdev->supply)); if (ret < 0) { _regulator_put(rdev->supply); rdev->supply = NULL; return ret; } supply_enabled = true; } ret = _regulator_do_enable(rdev); if (ret < 0 && ret != -EINVAL) { rdev_err(rdev, "failed to enable: %pe\n", ERR_PTR(ret)); if (supply_enabled) regulator_disable(rdev->supply); return ret; } if (rdev->constraints->always_on) rdev->use_count++; } else if (rdev->desc->off_on_delay) { rdev->last_off = ktime_get(); } if (!rdev->constraints->pw_budget_mW) rdev->constraints->pw_budget_mW = INT_MAX; print_constraints(rdev); return 0; } /** * regulator_event_work_fn - process a deferred regulator event * @work: work_struct queued by the notifier * * Calls the regulator's notifier chain in process context while holding * the rdev lock, then releases the device reference. */ static void regulator_event_work_fn(struct work_struct *work) { struct regulator_event_work *rew = container_of(work, struct regulator_event_work, work); struct regulator_dev *rdev = rew->rdev; int ret; regulator_lock(rdev); ret = regulator_notifier_call_chain(rdev, rew->event, NULL); regulator_unlock(rdev); if (ret == NOTIFY_BAD) dev_err(rdev_get_dev(rdev), "failed to forward regulator event\n"); put_device(rdev_get_dev(rdev)); kfree(rew); } /** * regulator_event_forward_notifier - notifier callback for supply events * @nb: notifier block embedded in the regulator * @event: regulator event code * @data: unused * * Packages the event into a work item and schedules it in process context. * Takes a reference on @rdev->dev to pin the regulator until the work * completes (see put_device() in the worker). * * Return: NOTIFY_OK on success, NOTIFY_DONE for events that are not forwarded. */ static int regulator_event_forward_notifier(struct notifier_block *nb, unsigned long event, void __always_unused *data) { struct regulator_dev *rdev = container_of(nb, struct regulator_dev, supply_fwd_nb); struct regulator_event_work *rew; switch (event) { case REGULATOR_EVENT_UNDER_VOLTAGE: break; default: /* Only forward allowed events downstream. */ return NOTIFY_DONE; } rew = kmalloc_obj(*rew, GFP_ATOMIC); if (!rew) return NOTIFY_DONE; get_device(rdev_get_dev(rdev)); rew->rdev = rdev; rew->event = event; INIT_WORK(&rew->work, regulator_event_work_fn); queue_work(system_highpri_wq, &rew->work); return NOTIFY_OK; } /** * register_regulator_event_forwarding - enable supply event forwarding * @rdev: regulator device * * Registers a notifier on the regulator's supply so that supply events * are forwarded to the consumer regulator via the deferred work handler. * * Return: 0 on success, -EALREADY if already enabled, or a negative error code. */ static int register_regulator_event_forwarding(struct regulator_dev *rdev) { int ret; if (!rdev->supply) return 0; /* top-level regulator: nothing to forward */ if (rdev->supply_fwd_nb.notifier_call) return -EALREADY; rdev->supply_fwd_nb.notifier_call = regulator_event_forward_notifier; ret = regulator_register_notifier(rdev->supply, &rdev->supply_fwd_nb); if (ret) { dev_err(&rdev->dev, "failed to register supply notifier: %pe\n", ERR_PTR(ret)); rdev->supply_fwd_nb.notifier_call = NULL; return ret; } return 0; } static void unregister_regulator_event_forwarding(struct regulator_dev *rdev) { if (!rdev->supply_fwd_nb.notifier_call) return; regulator_unregister_notifier(rdev->supply, &rdev->supply_fwd_nb); rdev->supply_fwd_nb.notifier_call = NULL; } /** * set_supply - set regulator supply regulator * @rdev: regulator (locked) * @supply_rdev: supply regulator (locked)) * * Called by platform initialisation code to set the supply regulator for this * regulator. This ensures that a regulators supply will also be enabled by the * core if it's child is enabled. * * Return: 0 on success or a negative error number on failure. */ static int set_supply(struct regulator_dev *rdev, struct regulator_dev *supply_rdev) { int err; rdev_dbg(rdev, "supplied by %s\n", rdev_get_name(supply_rdev)); if (!try_module_get(supply_rdev->owner)) return -ENODEV; rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY"); if (rdev->supply == NULL) { module_put(supply_rdev->owner); err = -ENOMEM; return err; } supply_rdev->open_count++; return 0; } /** * set_consumer_device_supply - Bind a regulator to a symbolic supply * @rdev: regulator source * @consumer_dev_name: dev_name() string for device supply applies to * @supply: symbolic name for supply * * Allows platform initialisation code to map physical regulator * sources to symbolic names for supplies for use by devices. Devices * should use these symbolic names to request regulators, avoiding the * need to provide board-specific regulator names as platform data. * * Return: 0 on success or a negative error number on failure. */ static int set_consumer_device_supply(struct regulator_dev *rdev, const char *consumer_dev_name, const char *supply) { struct regulator_map *node, *new_node; int has_dev; if (supply == NULL) return -EINVAL; if (consumer_dev_name != NULL) has_dev = 1; else has_dev = 0; new_node = kzalloc_obj(struct regulator_map); if (new_node == NULL) return -ENOMEM; new_node->regulator = rdev; new_node->supply = supply; if (has_dev) { new_node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL); if (new_node->dev_name == NULL) { kfree(new_node); return -ENOMEM; } } mutex_lock(&regulator_list_mutex); list_for_each_entry(node, &regulator_map_list, list) { if (node->dev_name && consumer_dev_name) { if (strcmp(node->dev_name, consumer_dev_name) != 0) continue; } else if (node->dev_name || consumer_dev_name) { continue; } if (strcmp(node->supply, supply) != 0) continue; pr_debug("%s: %s/%s is '%s' supply; fail %s/%s\n", consumer_dev_name, dev_name(&node->regulator->dev), node->regulator->desc->name, supply, dev_name(&rdev->dev), rdev_get_name(rdev)); goto fail; } list_add(&new_node->list, &regulator_map_list); mutex_unlock(&regulator_list_mutex); return 0; fail: mutex_unlock(&regulator_list_mutex); kfree(new_node->dev_name); kfree(new_node); return -EBUSY; } static void unset_regulator_supplies(struct regulator_dev *rdev) { struct regulator_map *node, *n; list_for_each_entry_safe(node, n, &regulator_map_list, list) { if (rdev == node->regulator) { list_del(&node->list); kfree(node->dev_name); kfree(node); } } } #ifdef CONFIG_DEBUG_FS static ssize_t constraint_flags_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { const struct regulator *regulator = file->private_data; const struct regulation_constraints *c = regulator->rdev->constraints; char *buf; ssize_t ret; if (!c) return 0; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; ret = snprintf(buf, PAGE_SIZE, "always_on: %u\n" "boot_on: %u\n" "apply_uV: %u\n" "ramp_disable: %u\n" "soft_start: %u\n" "pull_down: %u\n" "over_current_protection: %u\n", c->always_on, c->boot_on, c->apply_uV, c->ramp_disable, c->soft_start, c->pull_down, c->over_current_protection); ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); return ret; } #endif static const struct file_operations constraint_flags_fops = { #ifdef CONFIG_DEBUG_FS .open = simple_open, .read = constraint_flags_read_file, .llseek = default_llseek, #endif }; static void link_and_create_debugfs(struct regulator *regulator, struct regulator_dev *rdev, struct device *dev) { int err = 0; if (dev) { regulator->dev = dev; /* Add a link to the device sysfs entry */ err = sysfs_create_link_nowarn(&rdev->dev.kobj, &dev->kobj, regulator->supply_name); if (err) { rdev_dbg(rdev, "could not add device link %s: %pe\n", dev->kobj.name, ERR_PTR(err)); /* non-fatal */ } } if (err != -EEXIST) { regulator->debugfs = debugfs_create_dir(regulator->supply_name, rdev->debugfs); if (IS_ERR(regulator->debugfs)) { rdev_dbg(rdev, "Failed to create debugfs directory\n"); regulator->debugfs = NULL; } } if (regulator->debugfs) { debugfs_create_u32("uA_load", 0444, regulator->debugfs, &regulator->uA_load); debugfs_create_u32("min_uV", 0444, regulator->debugfs, &regulator->voltage[PM_SUSPEND_ON].min_uV); debugfs_create_u32("max_uV", 0444, regulator->debugfs, &regulator->voltage[PM_SUSPEND_ON].max_uV); debugfs_create_file("constraint_flags", 0444, regulator->debugfs, regulator, &constraint_flags_fops); } } static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, const char *supply_name) { struct regulator *regulator; lockdep_assert_held_once(&rdev->mutex.base); if (dev) { supply_name = kasprintf(GFP_KERNEL, "%s-%s", dev->kobj.name, supply_name); if (supply_name == NULL) return NULL; } else { supply_name = kstrdup_const(supply_name, GFP_KERNEL); if (supply_name == NULL) return NULL; } regulator = kzalloc_obj(*regulator); if (regulator == NULL) { kfree_const(supply_name); return NULL; } regulator->rdev = rdev; regulator->supply_name = supply_name; list_add(&regulator->list, &rdev->consumer_list); /* * Check now if the regulator is an always on regulator - if * it is then we don't need to do nearly so much work for * enable/disable calls. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS) && _regulator_is_enabled(rdev)) regulator->always_on = true; return regulator; } static int _regulator_get_enable_time(struct regulator_dev *rdev) { if (rdev->constraints && rdev->constraints->enable_time) return rdev->constraints->enable_time; if (rdev->desc->ops->enable_time) return rdev->desc->ops->enable_time(rdev); return rdev->desc->enable_time; } static struct regulator_supply_alias *regulator_find_supply_alias( struct device *dev, const char *supply) { struct regulator_supply_alias *map; list_for_each_entry(map, &regulator_supply_alias_list, list) if (map->src_dev == dev && strcmp(map->src_supply, supply) == 0) return map; return NULL; } static void regulator_supply_alias(struct device **dev, const char **supply) { struct regulator_supply_alias *map; mutex_lock(&regulator_list_mutex); map = regulator_find_supply_alias(*dev, *supply); if (map) { dev_dbg(*dev, "Mapping supply %s to %s,%s\n", *supply, map->alias_supply, dev_name(map->alias_dev)); *dev = map->alias_dev; *supply = map->alias_supply; } mutex_unlock(&regulator_list_mutex); } static int regulator_match(struct device *dev, const void *data) { struct regulator_dev *r = dev_to_rdev(dev); return strcmp(rdev_get_name(r), data) == 0; } static struct regulator_dev *regulator_lookup_by_name(const char *name) { struct device *dev; dev = class_find_device(&regulator_class, NULL, name, regulator_match); return dev ? dev_to_rdev(dev) : NULL; } static struct regulator_dev *regulator_dt_lookup(struct device *dev, const char *supply) { struct regulator_dev *r = NULL; if (dev_of_node(dev)) { r = of_regulator_dev_lookup(dev, dev_of_node(dev), supply); if (PTR_ERR(r) == -ENODEV) r = NULL; } return r; } /** * regulator_dev_lookup - lookup a regulator device. * @dev: device for regulator "consumer". * @supply: Supply name or regulator ID. * * Return: pointer to &struct regulator_dev or ERR_PTR() encoded negative error number. * * If successful, returns a struct regulator_dev that corresponds to the name * @supply and with the embedded struct device refcount incremented by one. * The refcount must be dropped by calling put_device(). * On failure one of the following ERR_PTR() encoded values is returned: * -%ENODEV if lookup fails permanently, -%EPROBE_DEFER if lookup could succeed * in the future. */ static struct regulator_dev *regulator_dev_lookup(struct device *dev, const char *supply) { struct regulator_dev *r = NULL; struct regulator_map *map; const char *devname = NULL; regulator_supply_alias(&dev, &supply); /* first do a dt based lookup */ r = regulator_dt_lookup(dev, supply); if (r) return r; /* if not found, try doing it non-dt way */ if (dev) devname = dev_name(dev); mutex_lock(&regulator_list_mutex); list_for_each_entry(map, &regulator_map_list, list) { /* If the mapping has a device set up it must match */ if (map->dev_name && (!devname || strcmp(map->dev_name, devname))) continue; if (strcmp(map->supply, supply) == 0 && get_device(&map->regulator->dev)) { r = map->regulator; break; } } mutex_unlock(&regulator_list_mutex); if (r) return r; r = regulator_lookup_by_name(supply); if (r) return r; return ERR_PTR(-ENODEV); } static int regulator_resolve_supply(struct regulator_dev *rdev) { struct regulator_dev *r; struct device *dev = rdev->dev.parent; struct ww_acquire_ctx ww_ctx; struct regulator *supply; bool do_final_setup; int ret = 0; /* No supply to resolve? */ if (!rdev->supply_name) return 0; /* Supply already resolved? (fast-path without locking contention) */ if (rdev->supply && !rdev->constraints_pending) return 0; /* first do a dt based lookup on the node described in the virtual * device. */ r = regulator_dt_lookup(&rdev->dev, rdev->supply_name); /* If regulator not found use usual search path in the parent * device. */ if (!r) r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); /* Did the lookup explicitly defer for us? */ if (ret == -EPROBE_DEFER) goto out; if (have_full_constraints()) { r = dummy_regulator_rdev; if (!r) { ret = -EPROBE_DEFER; goto out; } get_device(&r->dev); } else { dev_err(dev, "Failed to resolve %s-supply for %s\n", rdev->supply_name, rdev->desc->name); ret = -EPROBE_DEFER; goto out; } } if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); if (!have_full_constraints()) { ret = -EINVAL; goto out; } r = dummy_regulator_rdev; if (!r) { ret = -EPROBE_DEFER; goto out; } get_device(&r->dev); } /* * If the supply's parent device is not the same as the * regulator's parent device, then ensure the parent device * is bound before we resolve the supply, in case the parent * device get probe deferred and unregisters the supply. */ if (r->dev.parent && r->dev.parent != rdev->dev.parent) { if (!device_is_bound(r->dev.parent)) { put_device(&r->dev); ret = -EPROBE_DEFER; goto out; } } /* Recursively resolve the supply of the supply */ ret = regulator_resolve_supply(r); if (ret < 0) { put_device(&r->dev); goto out; } /* * Recheck rdev->supply with rdev->mutex lock held to avoid a race * between rdev->supply null check and setting rdev->supply in * set_supply() from concurrent tasks. */ regulator_lock_two(rdev, r, &ww_ctx); /* Supply just resolved by a concurrent task? */ if (rdev->supply) { /* Constraints might still be pending due to concurrency. */ bool done = !rdev->constraints_pending; supply = rdev->supply; regulator_unlock_two(rdev, r, &ww_ctx); put_device(&r->dev); /* * Supply resolved by concurrent task, and constraints set as * well (or not required): fast path. */ if (done) goto out; do_final_setup = false; } else { ret = set_supply(rdev, r); if (ret < 0) { regulator_unlock_two(rdev, r, &ww_ctx); put_device(&r->dev); goto out; } supply = rdev->supply; /* * Automatically register for event forwarding from the new * supply. This creates the downstream propagation link for * events like under-voltage. */ ret = register_regulator_event_forwarding(rdev); if (ret < 0) { rdev_warn(rdev, "Failed to register event forwarding: %pe\n", ERR_PTR(ret)); goto unset_supply; } regulator_unlock_two(rdev, r, &ww_ctx); do_final_setup = true; } /* * Now that we have the supply, we can retry setting the machine * constraints, if necessary. */ regulator_lock_dependent(rdev, &ww_ctx); if (rdev->constraints_pending) { if (!rdev->supply) { /* * Supply could have been released by another task that * failed to set the constraints or event forwarding. */ regulator_unlock_dependent(rdev, &ww_ctx); ret = -EPROBE_DEFER; goto out; } ret = set_machine_constraints(rdev, true); if (ret < 0) { regulator_unlock_dependent(rdev, &ww_ctx); rdev_warn(rdev, "Failed to set machine constraints: %pe\n", ERR_PTR(ret)); regulator_lock_two(rdev, r, &ww_ctx); if (supply != rdev->supply) { /* * Supply could have been released by another * task that got here before us. If it did, it * will have released 'supply' (i.e. the * previous rdev->supply) and we shouldn't do * that again via unset_supply. */ regulator_unlock_two(rdev, r, &ww_ctx); goto out; } unregister_regulator_event_forwarding(rdev); rdev->constraints_pending = true; goto unset_supply; } rdev->constraints_pending = false; } regulator_unlock_dependent(rdev, &ww_ctx); if (!do_final_setup) goto out; /* rdev->supply was created in set_supply() */ link_and_create_debugfs(rdev->supply, rdev->supply->rdev, &rdev->dev); out: return ret; unset_supply: lockdep_assert_held_once(&rdev->mutex.base); lockdep_assert_held_once(&r->mutex.base); rdev->supply = NULL; regulator_unlock_two(rdev, supply->rdev, &ww_ctx); regulator_put(supply); return ret; } /* common pre-checks for regulator requests */ int _regulator_get_common_check(struct device *dev, const char *id, enum regulator_get_type get_type) { if (get_type >= MAX_GET_TYPE) { dev_err(dev, "invalid type %d in %s\n", get_type, __func__); return -EINVAL; } if (id == NULL) { dev_err(dev, "regulator request with no identifier\n"); return -EINVAL; } return 0; } /** * _regulator_get_common - Common code for regulator requests * @rdev: regulator device pointer as returned by *regulator_dev_lookup() * Its reference count is expected to have been incremented. * @dev: device used for dev_printk messages * @id: Supply name or regulator ID * @get_type: enum regulator_get_type value corresponding to type of request * * Returns: pointer to struct regulator corresponding to @rdev, or ERR_PTR() * encoded error. * * This function should be chained with *regulator_dev_lookup() functions. */ struct regulator *_regulator_get_common(struct regulator_dev *rdev, struct device *dev, const char *id, enum regulator_get_type get_type) { struct regulator *regulator; struct device_link *link; int ret; if (IS_ERR(rdev)) { ret = PTR_ERR(rdev); /* * If regulator_dev_lookup() fails with error other * than -ENODEV our job here is done, we simply return it. */ if (ret != -ENODEV) return ERR_PTR(ret); if (!have_full_constraints()) { dev_warn(dev, "incomplete constraints, dummy supplies not allowed (id=%s)\n", id); return ERR_PTR(-ENODEV); } switch (get_type) { case NORMAL_GET: /* * Assume that a regulator is physically present and * enabled, even if it isn't hooked up, and just * provide a dummy. */ rdev = dummy_regulator_rdev; if (!rdev) return ERR_PTR(-EPROBE_DEFER); dev_warn(dev, "supply %s not found, using dummy regulator\n", id); get_device(&rdev->dev); break; case EXCLUSIVE_GET: dev_warn(dev, "dummy supplies not allowed for exclusive requests (id=%s)\n", id); fallthrough; default: return ERR_PTR(-ENODEV); } } if (rdev->exclusive) { regulator = ERR_PTR(-EPERM); put_device(&rdev->dev); return regulator; } if (get_type == EXCLUSIVE_GET && rdev->open_count) { regulator = ERR_PTR(-EBUSY); put_device(&rdev->dev); return regulator; } mutex_lock(&regulator_list_mutex); ret = (rdev->coupling_desc.n_resolved != rdev->coupling_desc.n_coupled); mutex_unlock(&regulator_list_mutex); if (ret != 0) { regulator = ERR_PTR(-EPROBE_DEFER); put_device(&rdev->dev); return regulator; } ret = regulator_resolve_supply(rdev); if (ret < 0) { regulator = ERR_PTR(ret); put_device(&rdev->dev); return regulator; } if (!try_module_get(rdev->owner)) { regulator = ERR_PTR(-EPROBE_DEFER); put_device(&rdev->dev); return regulator; } regulator_lock(rdev); regulator = create_regulator(rdev, dev, id); regulator_unlock(rdev); if (regulator == NULL) { regulator = ERR_PTR(-ENOMEM); module_put(rdev->owner); put_device(&rdev->dev); return regulator; } link_and_create_debugfs(regulator, rdev, dev); rdev->open_count++; if (get_type == EXCLUSIVE_GET) { rdev->exclusive = 1; ret = _regulator_is_enabled(rdev); if (ret > 0) { rdev->use_count = 1; regulator->enable_count = 1; /* Propagate the regulator state to its supply */ if (rdev->supply) { ret = regulator_enable(rdev->supply); if (ret < 0) { destroy_regulator(regulator); module_put(rdev->owner); put_device(&rdev->dev); return ERR_PTR(ret); } } } else { rdev->use_count = 0; regulator->enable_count = 0; } } link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS); if (!IS_ERR_OR_NULL(link)) regulator->device_link = true; return regulator; } /* Internal regulator request function */ struct regulator *_regulator_get(struct device *dev, const char *id, enum regulator_get_type get_type) { struct regulator_dev *rdev; int ret; ret = _regulator_get_common_check(dev, id, get_type); if (ret) return ERR_PTR(ret); rdev = regulator_dev_lookup(dev, id); return _regulator_get_common(rdev, dev, id, get_type); } /** * regulator_get - lookup and obtain a reference to a regulator. * @dev: device for regulator "consumer" * @id: Supply name or regulator ID. * * Use of supply names configured via set_consumer_device_supply() is * strongly encouraged. It is recommended that the supply name used * should match the name used for the supply and/or the relevant * device pins in the datasheet. * * Return: Pointer to a &struct regulator corresponding to the regulator * producer, or an ERR_PTR() encoded negative error number. */ struct regulator *regulator_get(struct device *dev, const char *id) { return _regulator_get(dev, id, NORMAL_GET); } EXPORT_SYMBOL_GPL(regulator_get); /** * regulator_get_exclusive - obtain exclusive access to a regulator. * @dev: device for regulator "consumer" * @id: Supply name or regulator ID. * * Other consumers will be unable to obtain this regulator while this * reference is held and the use count for the regulator will be * initialised to reflect the current state of the regulator. * * This is intended for use by consumers which cannot tolerate shared * use of the regulator such as those which need to force the * regulator off for correct operation of the hardware they are * controlling. * * Use of supply names configured via set_consumer_device_supply() is * strongly encouraged. It is recommended that the supply name used * should match the name used for the supply and/or the relevant * device pins in the datasheet. * * Return: Pointer to a &struct regulator corresponding to the regulator * producer, or an ERR_PTR() encoded negative error number. */ struct regulator *regulator_get_exclusive(struct device *dev, const char *id) { return _regulator_get(dev, id, EXCLUSIVE_GET); } EXPORT_SYMBOL_GPL(regulator_get_exclusive); /** * regulator_get_optional - obtain optional access to a regulator. * @dev: device for regulator "consumer" * @id: Supply name or regulator ID. * * This is intended for use by consumers for devices which can have * some supplies unconnected in normal use, such as some MMC devices. * It can allow the regulator core to provide stub supplies for other * supplies requested using normal regulator_get() calls without * disrupting the operation of drivers that can handle absent * supplies. * * Use of supply names configured via set_consumer_device_supply() is * strongly encouraged. It is recommended that the supply name used * should match the name used for the supply and/or the relevant * device pins in the datasheet. * * Return: Pointer to a &struct regulator corresponding to the regulator * producer, or an ERR_PTR() encoded negative error number. */ struct regulator *regulator_get_optional(struct device *dev, const char *id) { return _regulator_get(dev, id, OPTIONAL_GET); } EXPORT_SYMBOL_GPL(regulator_get_optional); static void destroy_regulator(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; debugfs_remove_recursive(regulator->debugfs); if (regulator->dev) { if (regulator->device_link) device_link_remove(regulator->dev, &rdev->dev); /* remove any sysfs entries */ sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); } regulator_lock(rdev); list_del(&regulator->list); rdev->open_count--; rdev->exclusive = 0; regulator_unlock(rdev); kfree_const(regulator->supply_name); kfree(regulator); } /* regulator_list_mutex lock held by regulator_put() */ static void _regulator_put(struct regulator *regulator) { struct regulator_dev *rdev; if (IS_ERR_OR_NULL(regulator)) return; lockdep_assert_held_once(&regulator_list_mutex); /* Docs say you must disable before calling regulator_put() */ WARN_ON(regulator->enable_count); rdev = regulator->rdev; destroy_regulator(regulator); module_put(rdev->owner); put_device(&rdev->dev); } /** * regulator_put - "free" the regulator source * @regulator: regulator source * * Note: drivers must ensure that all regulator_enable calls made on this * regulator source are balanced by regulator_disable calls prior to calling * this function. */ void regulator_put(struct regulator *regulator) { mutex_lock(&regulator_list_mutex); _regulator_put(regulator); mutex_unlock(&regulator_list_mutex); } EXPORT_SYMBOL_GPL(regulator_put); /** * regulator_register_supply_alias - Provide device alias for supply lookup * * @dev: device that will be given as the regulator "consumer" * @id: Supply name or regulator ID * @alias_dev: device that should be used to lookup the supply * @alias_id: Supply name or regulator ID that should be used to lookup the * supply * * All lookups for id on dev will instead be conducted for alias_id on * alias_dev. * * Return: 0 on success or a negative error number on failure. */ int regulator_register_supply_alias(struct device *dev, const char *id, struct device *alias_dev, const char *alias_id) { struct regulator_supply_alias *map; struct regulator_supply_alias *new_map; new_map = kzalloc_obj(struct regulator_supply_alias); if (!new_map) return -ENOMEM; mutex_lock(&regulator_list_mutex); map = regulator_find_supply_alias(dev, id); if (map) { mutex_unlock(&regulator_list_mutex); kfree(new_map); return -EEXIST; } new_map->src_dev = dev; new_map->src_supply = id; new_map->alias_dev = alias_dev; new_map->alias_supply = alias_id; list_add(&new_map->list, &regulator_supply_alias_list); mutex_unlock(&regulator_list_mutex); pr_info("Adding alias for supply %s,%s -> %s,%s\n", id, dev_name(dev), alias_id, dev_name(alias_dev)); return 0; } EXPORT_SYMBOL_GPL(regulator_register_supply_alias); /** * regulator_unregister_supply_alias - Remove device alias * * @dev: device that will be given as the regulator "consumer" * @id: Supply name or regulator ID * * Remove a lookup alias if one exists for id on dev. */ void regulator_unregister_supply_alias(struct device *dev, const char *id) { struct regulator_supply_alias *map; mutex_lock(&regulator_list_mutex); map = regulator_find_supply_alias(dev, id); if (map) { list_del(&map->list); kfree(map); } mutex_unlock(&regulator_list_mutex); } EXPORT_SYMBOL_GPL(regulator_unregister_supply_alias); /** * regulator_bulk_register_supply_alias - register multiple aliases * * @dev: device that will be given as the regulator "consumer" * @id: List of supply names or regulator IDs * @alias_dev: device that should be used to lookup the supply * @alias_id: List of supply names or regulator IDs that should be used to * lookup the supply * @num_id: Number of aliases to register * * This helper function allows drivers to register several supply * aliases in one operation. If any of the aliases cannot be * registered any aliases that were registered will be removed * before returning to the caller. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_register_supply_alias(struct device *dev, const char *const *id, struct device *alias_dev, const char *const *alias_id, int num_id) { int i; int ret; for (i = 0; i < num_id; ++i) { ret = regulator_register_supply_alias(dev, id[i], alias_dev, alias_id[i]); if (ret < 0) goto err; } return 0; err: dev_err(dev, "Failed to create supply alias %s,%s -> %s,%s\n", id[i], dev_name(dev), alias_id[i], dev_name(alias_dev)); while (--i >= 0) regulator_unregister_supply_alias(dev, id[i]); return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_register_supply_alias); /** * regulator_bulk_unregister_supply_alias - unregister multiple aliases * * @dev: device that will be given as the regulator "consumer" * @id: List of supply names or regulator IDs * @num_id: Number of aliases to unregister * * This helper function allows drivers to unregister several supply * aliases in one operation. */ void regulator_bulk_unregister_supply_alias(struct device *dev, const char *const *id, int num_id) { int i; for (i = 0; i < num_id; ++i) regulator_unregister_supply_alias(dev, id[i]); } EXPORT_SYMBOL_GPL(regulator_bulk_unregister_supply_alias); /* Manage enable GPIO list. Same GPIO pin can be shared among regulators */ static int regulator_ena_gpio_request(struct regulator_dev *rdev, const struct regulator_config *config) { struct regulator_enable_gpio *pin, *new_pin; struct gpio_desc *gpiod; gpiod = config->ena_gpiod; new_pin = kzalloc_obj(*new_pin); mutex_lock(&regulator_list_mutex); if (gpiod_is_shared(gpiod)) /* * The sharing of this GPIO pin is managed internally by * GPIOLIB. We don't need to keep track of its enable count. */ goto skip_compare; list_for_each_entry(pin, &regulator_ena_gpio_list, list) { if (gpiod_is_equal(pin->gpiod, gpiod)) { rdev_dbg(rdev, "GPIO is already used\n"); goto update_ena_gpio_to_rdev; } } if (new_pin == NULL) { mutex_unlock(&regulator_list_mutex); return -ENOMEM; } skip_compare: pin = new_pin; new_pin = NULL; pin->gpiod = gpiod; list_add(&pin->list, &regulator_ena_gpio_list); update_ena_gpio_to_rdev: pin->request_count++; rdev->ena_pin = pin; mutex_unlock(&regulator_list_mutex); kfree(new_pin); return 0; } static void regulator_ena_gpio_free(struct regulator_dev *rdev) { struct regulator_enable_gpio *pin, *n; if (!rdev->ena_pin) return; /* Free the GPIO only in case of no use */ list_for_each_entry_safe(pin, n, &regulator_ena_gpio_list, list) { if (pin != rdev->ena_pin) continue; if (--pin->request_count) break; gpiod_put(pin->gpiod); list_del(&pin->list); kfree(pin); break; } rdev->ena_pin = NULL; } /** * regulator_ena_gpio_ctrl - balance enable_count of each GPIO and actual GPIO pin control * @rdev: regulator_dev structure * @enable: enable GPIO at initial use? * * GPIO is enabled in case of initial use. (enable_count is 0) * GPIO is disabled when it is not shared any more. (enable_count <= 1) * * Return: 0 on success or a negative error number on failure. */ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable) { struct regulator_enable_gpio *pin = rdev->ena_pin; int ret; if (!pin) return -EINVAL; if (enable) { /* Enable GPIO at initial use */ if (pin->enable_count == 0) { ret = gpiod_set_value_cansleep(pin->gpiod, 1); if (ret) return ret; } pin->enable_count++; } else { if (pin->enable_count > 1) { pin->enable_count--; return 0; } /* Disable GPIO if not used */ if (pin->enable_count <= 1) { ret = gpiod_set_value_cansleep(pin->gpiod, 0); if (ret) return ret; pin->enable_count = 0; } } return 0; } /** * _regulator_check_status_enabled - check if regulator status can be * interpreted as "regulator is enabled" * @rdev: the regulator device to check * * Return: * * 1 - if status shows regulator is in enabled state * * 0 - if not enabled state * * Error Value - as received from ops->get_status() */ static inline int _regulator_check_status_enabled(struct regulator_dev *rdev) { int ret = rdev->desc->ops->get_status(rdev); if (ret < 0) { rdev_info(rdev, "get_status returned error: %d\n", ret); return ret; } switch (ret) { case REGULATOR_STATUS_OFF: case REGULATOR_STATUS_ERROR: case REGULATOR_STATUS_UNDEFINED: return 0; default: return 1; } } static int _regulator_do_enable(struct regulator_dev *rdev) { int ret, delay; /* Query before enabling in case configuration dependent. */ ret = _regulator_get_enable_time(rdev); if (ret >= 0) { delay = ret; } else { rdev_warn(rdev, "enable_time() failed: %pe\n", ERR_PTR(ret)); delay = 0; } trace_regulator_enable(rdev_get_name(rdev)); if (rdev->desc->off_on_delay) { /* if needed, keep a distance of off_on_delay from last time * this regulator was disabled. */ ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay); s64 remaining = ktime_us_delta(end, ktime_get_boottime()); if (remaining > 0) fsleep(remaining); } if (rdev->ena_pin) { if (!rdev->ena_gpio_state) { ret = regulator_ena_gpio_ctrl(rdev, true); if (ret < 0) return ret; rdev->ena_gpio_state = 1; } } else if (rdev->desc->ops->enable) { ret = rdev->desc->ops->enable(rdev); if (ret < 0) return ret; } else { return -EINVAL; } /* Allow the regulator to ramp; it would be useful to extend * this for bulk operations so that the regulators can ramp * together. */ trace_regulator_enable_delay(rdev_get_name(rdev)); /* If poll_enabled_time is set, poll upto the delay calculated * above, delaying poll_enabled_time uS to check if the regulator * actually got enabled. * If the regulator isn't enabled after our delay helper has expired, * return -ETIMEDOUT. */ if (rdev->desc->poll_enabled_time) { int time_remaining = delay; while (time_remaining > 0) { fsleep(rdev->desc->poll_enabled_time); if (rdev->desc->ops->get_status) { ret = _regulator_check_status_enabled(rdev); if (ret < 0) return ret; else if (ret) break; } else if (rdev->desc->ops->is_enabled(rdev)) break; time_remaining -= rdev->desc->poll_enabled_time; } if (time_remaining <= 0) { rdev_err(rdev, "Enabled check timed out\n"); return -ETIMEDOUT; } } else { fsleep(delay); } trace_regulator_enable_complete(rdev_get_name(rdev)); return 0; } /** * _regulator_handle_consumer_enable - handle that a consumer enabled * @regulator: regulator source * * Some things on a regulator consumer (like the contribution towards total * load on the regulator) only have an effect when the consumer wants the * regulator enabled. Explained in example with two consumers of the same * regulator: * consumer A: set_load(100); => total load = 0 * consumer A: regulator_enable(); => total load = 100 * consumer B: set_load(1000); => total load = 100 * consumer B: regulator_enable(); => total load = 1100 * consumer A: regulator_disable(); => total_load = 1000 * * This function (together with _regulator_handle_consumer_disable) is * responsible for keeping track of the refcount for a given regulator consumer * and applying / unapplying these things. * * Return: 0 on success or negative error number on failure. */ static int _regulator_handle_consumer_enable(struct regulator *regulator) { int ret; struct regulator_dev *rdev = regulator->rdev; lockdep_assert_held_once(&rdev->mutex.base); regulator->enable_count++; if (regulator->uA_load && regulator->enable_count == 1) { ret = drms_uA_update(rdev); if (ret) regulator->enable_count--; return ret; } return 0; } /** * _regulator_handle_consumer_disable - handle that a consumer disabled * @regulator: regulator source * * The opposite of _regulator_handle_consumer_enable(). * * Return: 0 on success or a negative error number on failure. */ static int _regulator_handle_consumer_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; lockdep_assert_held_once(&rdev->mutex.base); if (!regulator->enable_count) { rdev_err(rdev, "Underflow of regulator enable count\n"); return -EINVAL; } regulator->enable_count--; if (regulator->uA_load && regulator->enable_count == 0) return drms_uA_update(rdev); return 0; } /* locks held by regulator_enable() */ static int _regulator_enable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; int ret; lockdep_assert_held_once(&rdev->mutex.base); if (rdev->use_count == 0 && rdev->supply) { ret = _regulator_enable(rdev->supply); if (ret < 0) return ret; } /* balance only if there are regulators coupled */ if (rdev->coupling_desc.n_coupled > 1) { ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (ret < 0) goto err_disable_supply; } ret = _regulator_handle_consumer_enable(regulator); if (ret < 0) goto err_disable_supply; if (rdev->use_count == 0) { /* * The regulator may already be enabled if it's not switchable * or was left on */ ret = _regulator_is_enabled(rdev); if (ret == -EINVAL || ret == 0) { if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { ret = -EPERM; goto err_consumer_disable; } ret = _regulator_do_enable(rdev); if (ret < 0) goto err_consumer_disable; _notifier_call_chain(rdev, REGULATOR_EVENT_ENABLE, NULL); } else if (ret < 0) { rdev_err(rdev, "is_enabled() failed: %pe\n", ERR_PTR(ret)); goto err_consumer_disable; } /* Fallthrough on positive return values - already enabled */ } if (regulator->enable_count == 1) rdev->use_count++; return 0; err_consumer_disable: _regulator_handle_consumer_disable(regulator); err_disable_supply: if (rdev->use_count == 0 && rdev->supply) _regulator_disable(rdev->supply); return ret; } /** * regulator_enable - enable regulator output * @regulator: regulator source * * Request that the regulator be enabled with the regulator output at * the predefined voltage or current value. Calls to regulator_enable() * must be balanced with calls to regulator_disable(). * * NOTE: the output value can be set by other drivers, boot loader or may be * hardwired in the regulator. * * Return: 0 on success or a negative error number on failure. */ int regulator_enable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_enable(regulator); regulator_unlock_dependent(rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_enable); static int _regulator_do_disable(struct regulator_dev *rdev) { int ret; trace_regulator_disable(rdev_get_name(rdev)); if (rdev->ena_pin) { if (rdev->ena_gpio_state) { ret = regulator_ena_gpio_ctrl(rdev, false); if (ret < 0) return ret; rdev->ena_gpio_state = 0; } } else if (rdev->desc->ops->disable) { ret = rdev->desc->ops->disable(rdev); if (ret != 0) return ret; } if (rdev->desc->off_on_delay) rdev->last_off = ktime_get_boottime(); trace_regulator_disable_complete(rdev_get_name(rdev)); return 0; } /* locks held by regulator_disable() */ static int _regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; int ret = 0; lockdep_assert_held_once(&rdev->mutex.base); if (WARN(regulator->enable_count == 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) return -EIO; if (regulator->enable_count == 1) { /* disabling last enable_count from this regulator */ /* are we the last user and permitted to disable ? */ if (rdev->use_count == 1 && (rdev->constraints && !rdev->constraints->always_on)) { /* we are last user */ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_DISABLE, NULL); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = _regulator_do_disable(rdev); if (ret < 0) { rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_DISABLE, NULL); return ret; } _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, NULL); } rdev->use_count = 0; } else if (rdev->use_count > 1) { rdev->use_count--; } } if (ret == 0) ret = _regulator_handle_consumer_disable(regulator); if (ret == 0 && rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (ret == 0 && rdev->use_count == 0 && rdev->supply) ret = _regulator_disable(rdev->supply); return ret; } /** * regulator_disable - disable regulator output * @regulator: regulator source * * Disable the regulator output voltage or current. Calls to * regulator_enable() must be balanced with calls to * regulator_disable(). * * NOTE: this will only disable the regulator output if no other consumer * devices have it enabled, the regulator device supports disabling and * machine constraints permit this operation. * * Return: 0 on success or a negative error number on failure. */ int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_disable(regulator); regulator_unlock_dependent(rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_disable); /* locks held by regulator_force_disable() */ static int _regulator_force_disable(struct regulator_dev *rdev) { int ret = 0; lockdep_assert_held_once(&rdev->mutex.base); ret = _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_PRE_DISABLE, NULL); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = _regulator_do_disable(rdev); if (ret < 0) { rdev_err(rdev, "failed to force disable: %pe\n", ERR_PTR(ret)); _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_ABORT_DISABLE, NULL); return ret; } _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | REGULATOR_EVENT_DISABLE, NULL); return 0; } /** * regulator_force_disable - force disable regulator output * @regulator: regulator source * * Forcibly disable the regulator output voltage or current. * NOTE: this *will* disable the regulator output even if other consumer * devices have it enabled. This should be used for situations when device * damage will likely occur if the regulator is not disabled (e.g. over temp). * * Return: 0 on success or a negative error number on failure. */ int regulator_force_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(rdev, &ww_ctx); ret = _regulator_force_disable(regulator->rdev); if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); if (regulator->uA_load) { regulator->uA_load = 0; ret = drms_uA_update(rdev); } if (rdev->use_count != 0 && rdev->supply) _regulator_disable(rdev->supply); regulator_unlock_dependent(rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_force_disable); static void regulator_disable_work(struct work_struct *work) { struct regulator_dev *rdev = container_of(work, struct regulator_dev, disable_work.work); struct ww_acquire_ctx ww_ctx; int count, i, ret; struct regulator *regulator; int total_count = 0; regulator_lock_dependent(rdev, &ww_ctx); /* * Workqueue functions queue the new work instance while the previous * work instance is being processed. Cancel the queued work instance * as the work instance under processing does the job of the queued * work instance. */ cancel_delayed_work(&rdev->disable_work); list_for_each_entry(regulator, &rdev->consumer_list, list) { count = regulator->deferred_disables; if (!count) continue; total_count += count; regulator->deferred_disables = 0; for (i = 0; i < count; i++) { ret = _regulator_disable(regulator); if (ret != 0) rdev_err(rdev, "Deferred disable failed: %pe\n", ERR_PTR(ret)); } } WARN_ON(!total_count); if (rdev->coupling_desc.n_coupled > 1) regulator_balance_voltage(rdev, PM_SUSPEND_ON); regulator_unlock_dependent(rdev, &ww_ctx); } /** * regulator_disable_deferred - disable regulator output with delay * @regulator: regulator source * @ms: milliseconds until the regulator is disabled * * Execute regulator_disable() on the regulator after a delay. This * is intended for use with devices that require some time to quiesce. * * NOTE: this will only disable the regulator output if no other consumer * devices have it enabled, the regulator device supports disabling and * machine constraints permit this operation. * * Return: 0 on success or a negative error number on failure. */ int regulator_disable_deferred(struct regulator *regulator, int ms) { struct regulator_dev *rdev = regulator->rdev; if (!ms) return regulator_disable(regulator); regulator_lock(rdev); regulator->deferred_disables++; mod_delayed_work(system_power_efficient_wq, &rdev->disable_work, msecs_to_jiffies(ms)); regulator_unlock(rdev); return 0; } EXPORT_SYMBOL_GPL(regulator_disable_deferred); static int _regulator_is_enabled(struct regulator_dev *rdev) { /* A GPIO control always takes precedence */ if (rdev->ena_pin) return rdev->ena_gpio_state; /* If we don't know then assume that the regulator is always on */ if (!rdev->desc->ops->is_enabled) return 1; return rdev->desc->ops->is_enabled(rdev); } static int _regulator_list_voltage(struct regulator_dev *rdev, unsigned selector, int lock) { const struct regulator_ops *ops = rdev->desc->ops; int ret; if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector) return rdev->desc->fixed_uV; if (ops->list_voltage) { if (selector >= rdev->desc->n_voltages) return -EINVAL; if (selector < rdev->desc->linear_min_sel) return 0; if (lock) regulator_lock(rdev); ret = ops->list_voltage(rdev, selector); if (lock) regulator_unlock(rdev); } else if (rdev->is_switch && rdev->supply) { ret = _regulator_list_voltage(rdev->supply->rdev, selector, lock); } else { return -EINVAL; } if (ret > 0) { if (ret < rdev->constraints->min_uV) ret = 0; else if (ret > rdev->constraints->max_uV) ret = 0; } return ret; } /** * regulator_is_enabled - is the regulator output enabled * @regulator: regulator source * * Note that the device backing this regulator handle can have multiple * users, so it might be enabled even if regulator_enable() was never * called for this particular source. * * Return: Positive if the regulator driver backing the source/client * has requested that the device be enabled, zero if it hasn't, * else a negative error number. */ int regulator_is_enabled(struct regulator *regulator) { int ret; if (regulator->always_on) return 1; regulator_lock(regulator->rdev); ret = _regulator_is_enabled(regulator->rdev); regulator_unlock(regulator->rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_is_enabled); /** * regulator_count_voltages - count regulator_list_voltage() selectors * @regulator: regulator source * * Return: Number of selectors for @regulator, or negative error number. * * Selectors are numbered starting at zero, and typically correspond to * bitfields in hardware registers. */ int regulator_count_voltages(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; if (rdev->desc->n_voltages) return rdev->desc->n_voltages; if (!rdev->is_switch || !rdev->supply) return -EINVAL; return regulator_count_voltages(rdev->supply); } EXPORT_SYMBOL_GPL(regulator_count_voltages); /** * regulator_list_voltage - enumerate supported voltages * @regulator: regulator source * @selector: identify voltage to list * Context: can sleep * * Return: Voltage for @selector that can be passed to regulator_set_voltage(), * 0 if @selector can't be used on this system, or a negative error * number on failure. */ int regulator_list_voltage(struct regulator *regulator, unsigned selector) { return _regulator_list_voltage(regulator->rdev, selector, 1); } EXPORT_SYMBOL_GPL(regulator_list_voltage); /** * regulator_get_regmap - get the regulator's register map * @regulator: regulator source * * Return: Pointer to the &struct regmap for @regulator, or ERR_PTR() * encoded -%EOPNOTSUPP if @regulator doesn't use regmap. */ struct regmap *regulator_get_regmap(struct regulator *regulator) { struct regmap *map = regulator->rdev->regmap; return map ? map : ERR_PTR(-EOPNOTSUPP); } EXPORT_SYMBOL_GPL(regulator_get_regmap); /** * regulator_get_hardware_vsel_register - get the HW voltage selector register * @regulator: regulator source * @vsel_reg: voltage selector register, output parameter * @vsel_mask: mask for voltage selector bitfield, output parameter * * Returns the hardware register offset and bitmask used for setting the * regulator voltage. This might be useful when configuring voltage-scaling * hardware or firmware that can make I2C requests behind the kernel's back, * for example. * * Return: 0 on success, or -%EOPNOTSUPP if the regulator does not support * voltage selectors. * * On success, the output parameters @vsel_reg and @vsel_mask are filled in * and 0 is returned, otherwise a negative error number is returned. */ int regulator_get_hardware_vsel_register(struct regulator *regulator, unsigned *vsel_reg, unsigned *vsel_mask) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap) return -EOPNOTSUPP; *vsel_reg = rdev->desc->vsel_reg; *vsel_mask = rdev->desc->vsel_mask; return 0; } EXPORT_SYMBOL_GPL(regulator_get_hardware_vsel_register); /** * regulator_list_hardware_vsel - get the HW-specific register value for a selector * @regulator: regulator source * @selector: identify voltage to list * * Converts the selector to a hardware-specific voltage selector that can be * directly written to the regulator registers. The address of the voltage * register can be determined by calling @regulator_get_hardware_vsel_register. * * Return: 0 on success, -%EINVAL if the selector is outside the supported * range, or -%EOPNOTSUPP if the regulator does not support voltage * selectors. */ int regulator_list_hardware_vsel(struct regulator *regulator, unsigned selector) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; if (selector >= rdev->desc->n_voltages) return -EINVAL; if (selector < rdev->desc->linear_min_sel) return 0; if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap) return -EOPNOTSUPP; return selector; } EXPORT_SYMBOL_GPL(regulator_list_hardware_vsel); /** * regulator_hardware_enable - access the HW for enable/disable regulator * @regulator: regulator source * @enable: true for enable, false for disable * * Request that the regulator be enabled/disabled with the regulator output at * the predefined voltage or current value. * * Return: 0 on success or a negative error number on failure. */ int regulator_hardware_enable(struct regulator *regulator, bool enable) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; int ret = -EOPNOTSUPP; if (!rdev->exclusive || !ops || !ops->enable || !ops->disable) return ret; if (enable) ret = ops->enable(rdev); else ret = ops->disable(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_hardware_enable); /** * regulator_get_linear_step - return the voltage step size between VSEL values * @regulator: regulator source * * Return: The voltage step size between VSEL values for linear regulators, * or 0 if the regulator isn't a linear regulator. */ unsigned int regulator_get_linear_step(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; return rdev->desc->uV_step; } EXPORT_SYMBOL_GPL(regulator_get_linear_step); /** * regulator_is_supported_voltage - check if a voltage range can be supported * * @regulator: Regulator to check. * @min_uV: Minimum required voltage in uV. * @max_uV: Maximum required voltage in uV. * * Return: 1 if the voltage range is supported, 0 if not, or a negative error * number if @regulator's voltage can't be changed and voltage readback * failed. */ int regulator_is_supported_voltage(struct regulator *regulator, int min_uV, int max_uV) { struct regulator_dev *rdev = regulator->rdev; int i, voltages, ret; /* If we can't change voltage check the current voltage */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { ret = regulator_get_voltage(regulator); if (ret >= 0) return min_uV <= ret && ret <= max_uV; else return ret; } /* Any voltage within constrains range is fine? */ if (rdev->desc->continuous_voltage_range) return min_uV >= rdev->constraints->min_uV && max_uV <= rdev->constraints->max_uV; ret = regulator_count_voltages(regulator); if (ret < 0) return 0; voltages = ret; for (i = 0; i < voltages; i++) { ret = regulator_list_voltage(regulator, i); if (ret >= min_uV && ret <= max_uV) return 1; } return 0; } EXPORT_SYMBOL_GPL(regulator_is_supported_voltage); static int regulator_map_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { const struct regulator_desc *desc = rdev->desc; if (desc->ops->map_voltage) return desc->ops->map_voltage(rdev, min_uV, max_uV); if (desc->ops->list_voltage == regulator_list_voltage_linear) return regulator_map_voltage_linear(rdev, min_uV, max_uV); if (desc->ops->list_voltage == regulator_list_voltage_linear_range) return regulator_map_voltage_linear_range(rdev, min_uV, max_uV); if (desc->ops->list_voltage == regulator_list_voltage_pickable_linear_range) return regulator_map_voltage_pickable_linear_range(rdev, min_uV, max_uV); return regulator_map_voltage_iterate(rdev, min_uV, max_uV); } static int _regulator_call_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, unsigned *selector) { struct pre_voltage_change_data data; int ret; data.old_uV = regulator_get_voltage_rdev(rdev); data.min_uV = min_uV; data.max_uV = max_uV; ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, &data); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector); if (ret >= 0) return ret; _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, (void *)data.old_uV); return ret; } static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev, int uV, unsigned selector) { struct pre_voltage_change_data data; int ret; data.old_uV = regulator_get_voltage_rdev(rdev); data.min_uV = uV; data.max_uV = uV; ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, &data); if (ret & NOTIFY_STOP_MASK) return -EINVAL; ret = rdev->desc->ops->set_voltage_sel(rdev, selector); if (ret >= 0) return ret; _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, (void *)data.old_uV); return ret; } static int _regulator_set_voltage_sel_step(struct regulator_dev *rdev, int uV, int new_selector) { const struct regulator_ops *ops = rdev->desc->ops; int diff, old_sel, curr_sel, ret; /* Stepping is only needed if the regulator is enabled. */ if (!_regulator_is_enabled(rdev)) goto final_set; if (!ops->get_voltage_sel) return -EINVAL; old_sel = ops->get_voltage_sel(rdev); if (old_sel < 0) return old_sel; diff = new_selector - old_sel; if (diff == 0) return 0; /* No change needed. */ if (diff > 0) { /* Stepping up. */ for (curr_sel = old_sel + rdev->desc->vsel_step; curr_sel < new_selector; curr_sel += rdev->desc->vsel_step) { /* * Call the callback directly instead of using * _regulator_call_set_voltage_sel() as we don't * want to notify anyone yet. Same in the branch * below. */ ret = ops->set_voltage_sel(rdev, curr_sel); if (ret) goto try_revert; } } else { /* Stepping down. */ for (curr_sel = old_sel - rdev->desc->vsel_step; curr_sel > new_selector; curr_sel -= rdev->desc->vsel_step) { ret = ops->set_voltage_sel(rdev, curr_sel); if (ret) goto try_revert; } } final_set: /* The final selector will trigger the notifiers. */ return _regulator_call_set_voltage_sel(rdev, uV, new_selector); try_revert: /* * At least try to return to the previous voltage if setting a new * one failed. */ (void)ops->set_voltage_sel(rdev, old_sel); return ret; } static int _regulator_set_voltage_time(struct regulator_dev *rdev, int old_uV, int new_uV) { unsigned int ramp_delay = 0; if (rdev->constraints->ramp_delay) ramp_delay = rdev->constraints->ramp_delay; else if (rdev->desc->ramp_delay) ramp_delay = rdev->desc->ramp_delay; else if (rdev->constraints->settling_time) return rdev->constraints->settling_time; else if (rdev->constraints->settling_time_up && (new_uV > old_uV)) return rdev->constraints->settling_time_up; else if (rdev->constraints->settling_time_down && (new_uV < old_uV)) return rdev->constraints->settling_time_down; if (ramp_delay == 0) return 0; return DIV_ROUND_UP(abs(new_uV - old_uV), ramp_delay); } static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { int ret; int delay = 0; int best_val = 0; unsigned int selector; int old_selector = -1; const struct regulator_ops *ops = rdev->desc->ops; int old_uV = regulator_get_voltage_rdev(rdev); trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV); min_uV += rdev->constraints->uV_offset; max_uV += rdev->constraints->uV_offset; /* * If we can't obtain the old selector there is not enough * info to call set_voltage_time_sel(). */ if (_regulator_is_enabled(rdev) && ops->set_voltage_time_sel && ops->get_voltage_sel) { old_selector = ops->get_voltage_sel(rdev); if (old_selector < 0) return old_selector; } if (ops->set_voltage) { ret = _regulator_call_set_voltage(rdev, min_uV, max_uV, &selector); if (ret >= 0) { if (ops->list_voltage) best_val = ops->list_voltage(rdev, selector); else best_val = regulator_get_voltage_rdev(rdev); } } else if (ops->set_voltage_sel) { ret = regulator_map_voltage(rdev, min_uV, max_uV); if (ret >= 0) { best_val = ops->list_voltage(rdev, ret); if (min_uV <= best_val && max_uV >= best_val) { selector = ret; if (old_selector == selector) ret = 0; else if (rdev->desc->vsel_step) ret = _regulator_set_voltage_sel_step( rdev, best_val, selector); else ret = _regulator_call_set_voltage_sel( rdev, best_val, selector); } else { ret = -EINVAL; } } } else { ret = -EINVAL; } if (ret) goto out; if (ops->set_voltage_time_sel) { /* * Call set_voltage_time_sel if successfully obtained * old_selector */ if (old_selector >= 0 && old_selector != selector) delay = ops->set_voltage_time_sel(rdev, old_selector, selector); } else { if (old_uV != best_val) { if (ops->set_voltage_time) delay = ops->set_voltage_time(rdev, old_uV, best_val); else delay = _regulator_set_voltage_time(rdev, old_uV, best_val); } } if (delay < 0) { rdev_warn(rdev, "failed to get delay: %pe\n", ERR_PTR(delay)); delay = 0; } /* Insert any necessary delays */ fsleep(delay); if (best_val >= 0) { unsigned long data = best_val; _notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE, (void *)data); } out: trace_regulator_set_voltage_complete(rdev_get_name(rdev), best_val); return ret; } static int _regulator_do_set_suspend_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, suspend_state_t state) { struct regulator_state *rstate; int uV, sel; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return -EINVAL; if (min_uV < rstate->min_uV) min_uV = rstate->min_uV; if (max_uV > rstate->max_uV) max_uV = rstate->max_uV; sel = regulator_map_voltage(rdev, min_uV, max_uV); if (sel < 0) return sel; uV = rdev->desc->ops->list_voltage(rdev, sel); if (uV >= min_uV && uV <= max_uV) rstate->uV = uV; return 0; } static int regulator_get_voltage_delta(struct regulator_dev *rdev, int uV) { int current_uV = regulator_get_voltage_rdev(rdev); if (current_uV < 0) return current_uV; return abs(current_uV - uV); } static int regulator_set_voltage_unlocked(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { struct regulator_dev *rdev = regulator->rdev; struct regulator_voltage *voltage = &regulator->voltage[state]; int ret = 0; int current_uV, delta, new_delta; int old_min_uV, old_max_uV; /* If we're setting the same range as last time the change * should be a noop (some cpufreq implementations use the same * voltage for multiple frequencies, for example). */ if (voltage->min_uV == min_uV && voltage->max_uV == max_uV) goto out; /* If we're trying to set a range that overlaps the current voltage, * return successfully even though the regulator does not support * changing the voltage. */ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { current_uV = regulator_get_voltage_rdev(rdev); if (min_uV <= current_uV && current_uV <= max_uV) { voltage->min_uV = min_uV; voltage->max_uV = max_uV; goto out; } } /* sanity check */ if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } /* constraints check */ ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; /* restore original values in case of error */ old_min_uV = voltage->min_uV; old_max_uV = voltage->max_uV; voltage->min_uV = min_uV; voltage->max_uV = max_uV; /* for not coupled regulators this will just set the voltage */ ret = regulator_balance_voltage(rdev, state); if (ret < 0) { voltage->min_uV = old_min_uV; voltage->max_uV = old_max_uV; } if (rdev->constraints->max_uV_step > 0) { /* For regulators with a maximum voltage step, reaching the desired * voltage might take a few retries. */ ret = regulator_get_voltage_delta(rdev, min_uV); if (ret < 0) goto out; delta = ret; while (delta > 0) { ret = regulator_balance_voltage(rdev, state); if (ret < 0) goto out; ret = regulator_get_voltage_delta(rdev, min_uV); if (ret < 0) goto out; new_delta = ret; /* check that voltage is converging quickly enough */ if (delta - new_delta < rdev->constraints->max_uV_step) { ret = -EWOULDBLOCK; goto out; } delta = new_delta; } } out: return ret; } int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV, int max_uV, suspend_state_t state) { int best_supply_uV = 0; int supply_change_uV = 0; int ret; if (rdev->supply && regulator_ops_is_valid(rdev->supply->rdev, REGULATOR_CHANGE_VOLTAGE) && (rdev->desc->min_dropout_uV || !(rdev->desc->ops->get_voltage || rdev->desc->ops->get_voltage_sel))) { int current_supply_uV; int selector; selector = regulator_map_voltage(rdev, min_uV, max_uV); if (selector < 0) { ret = selector; goto out; } best_supply_uV = _regulator_list_voltage(rdev, selector, 0); if (best_supply_uV < 0) { ret = best_supply_uV; goto out; } best_supply_uV += rdev->desc->min_dropout_uV; current_supply_uV = regulator_get_voltage_rdev(rdev->supply->rdev); if (current_supply_uV < 0) { ret = current_supply_uV; goto out; } supply_change_uV = best_supply_uV - current_supply_uV; } if (supply_change_uV > 0) { ret = regulator_set_voltage_unlocked(rdev->supply, best_supply_uV, INT_MAX, state); if (ret) { dev_err(&rdev->dev, "Failed to increase supply voltage: %pe\n", ERR_PTR(ret)); goto out; } } if (state == PM_SUSPEND_ON) ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); else ret = _regulator_do_set_suspend_voltage(rdev, min_uV, max_uV, state); if (ret < 0) goto out; if (supply_change_uV < 0) { ret = regulator_set_voltage_unlocked(rdev->supply, best_supply_uV, INT_MAX, state); if (ret) dev_warn(&rdev->dev, "Failed to decrease supply voltage: %pe\n", ERR_PTR(ret)); /* No need to fail here */ ret = 0; } out: return ret; } EXPORT_SYMBOL_GPL(regulator_set_voltage_rdev); static int regulator_limit_voltage_step(struct regulator_dev *rdev, int *current_uV, int *min_uV) { struct regulation_constraints *constraints = rdev->constraints; /* Limit voltage change only if necessary */ if (!constraints->max_uV_step || !_regulator_is_enabled(rdev)) return 1; if (*current_uV < 0) { *current_uV = regulator_get_voltage_rdev(rdev); if (*current_uV < 0) return *current_uV; } if (abs(*current_uV - *min_uV) <= constraints->max_uV_step) return 1; /* Clamp target voltage within the given step */ if (*current_uV < *min_uV) *min_uV = min(*current_uV + constraints->max_uV_step, *min_uV); else *min_uV = max(*current_uV - constraints->max_uV_step, *min_uV); return 0; } static int regulator_get_optimal_voltage(struct regulator_dev *rdev, int *current_uV, int *min_uV, int *max_uV, suspend_state_t state, int n_coupled) { struct coupling_desc *c_desc = &rdev->coupling_desc; struct regulator_dev **c_rdevs = c_desc->coupled_rdevs; struct regulation_constraints *constraints = rdev->constraints; int desired_min_uV = 0, desired_max_uV = INT_MAX; int max_current_uV = 0, min_current_uV = INT_MAX; int highest_min_uV = 0, target_uV, possible_uV; int i, ret, max_spread; bool done; *current_uV = -1; /* * If there are no coupled regulators, simply set the voltage * demanded by consumers. */ if (n_coupled == 1) { /* * If consumers don't provide any demands, set voltage * to min_uV */ desired_min_uV = constraints->min_uV; desired_max_uV = constraints->max_uV; ret = regulator_check_consumers(rdev, &desired_min_uV, &desired_max_uV, state); if (ret < 0) return ret; done = true; goto finish; } /* Find highest min desired voltage */ for (i = 0; i < n_coupled; i++) { int tmp_min = 0; int tmp_max = INT_MAX; lockdep_assert_held_once(&c_rdevs[i]->mutex.base); ret = regulator_check_consumers(c_rdevs[i], &tmp_min, &tmp_max, state); if (ret < 0) return ret; ret = regulator_check_voltage(c_rdevs[i], &tmp_min, &tmp_max); if (ret < 0) return ret; highest_min_uV = max(highest_min_uV, tmp_min); if (i == 0) { desired_min_uV = tmp_min; desired_max_uV = tmp_max; } } max_spread = constraints->max_spread[0]; /* * Let target_uV be equal to the desired one if possible. * If not, set it to minimum voltage, allowed by other coupled * regulators. */ target_uV = max(desired_min_uV, highest_min_uV - max_spread); /* * Find min and max voltages, which currently aren't violating * max_spread. */ for (i = 1; i < n_coupled; i++) { int tmp_act; if (!_regulator_is_enabled(c_rdevs[i])) continue; tmp_act = regulator_get_voltage_rdev(c_rdevs[i]); if (tmp_act < 0) return tmp_act; min_current_uV = min(tmp_act, min_current_uV); max_current_uV = max(tmp_act, max_current_uV); } /* There aren't any other regulators enabled */ if (max_current_uV == 0) { possible_uV = target_uV; } else { /* * Correct target voltage, so as it currently isn't * violating max_spread */ possible_uV = max(target_uV, max_current_uV - max_spread); possible_uV = min(possible_uV, min_current_uV + max_spread); } if (possible_uV > desired_max_uV) return -EINVAL; done = (possible_uV == target_uV); desired_min_uV = possible_uV; finish: /* Apply max_uV_step constraint if necessary */ if (state == PM_SUSPEND_ON) { ret = regulator_limit_voltage_step(rdev, current_uV, &desired_min_uV); if (ret < 0) return ret; if (ret == 0) done = false; } /* Set current_uV if wasn't done earlier in the code and if necessary */ if (n_coupled > 1 && *current_uV == -1) { if (_regulator_is_enabled(rdev)) { ret = regulator_get_voltage_rdev(rdev); if (ret < 0) return ret; *current_uV = ret; } else { *current_uV = desired_min_uV; } } *min_uV = desired_min_uV; *max_uV = desired_max_uV; return done; } int regulator_do_balance_voltage(struct regulator_dev *rdev, suspend_state_t state, bool skip_coupled) { struct regulator_dev **c_rdevs; struct regulator_dev *best_rdev; struct coupling_desc *c_desc = &rdev->coupling_desc; int i, ret, n_coupled, best_min_uV, best_max_uV, best_c_rdev; unsigned int delta, best_delta; unsigned long c_rdev_done = 0; bool best_c_rdev_done; c_rdevs = c_desc->coupled_rdevs; n_coupled = skip_coupled ? 1 : c_desc->n_coupled; /* * Find the best possible voltage change on each loop. Leave the loop * if there isn't any possible change. */ do { best_c_rdev_done = false; best_delta = 0; best_min_uV = 0; best_max_uV = 0; best_c_rdev = 0; best_rdev = NULL; /* * Find highest difference between optimal voltage * and current voltage. */ for (i = 0; i < n_coupled; i++) { /* * optimal_uV is the best voltage that can be set for * i-th regulator at the moment without violating * max_spread constraint in order to balance * the coupled voltages. */ int optimal_uV = 0, optimal_max_uV = 0, current_uV = 0; if (test_bit(i, &c_rdev_done)) continue; ret = regulator_get_optimal_voltage(c_rdevs[i], &current_uV, &optimal_uV, &optimal_max_uV, state, n_coupled); if (ret < 0) goto out; delta = abs(optimal_uV - current_uV); if (delta && best_delta <= delta) { best_c_rdev_done = ret; best_delta = delta; best_rdev = c_rdevs[i]; best_min_uV = optimal_uV; best_max_uV = optimal_max_uV; best_c_rdev = i; } } /* Nothing to change, return successfully */ if (!best_rdev) { ret = 0; goto out; } ret = regulator_set_voltage_rdev(best_rdev, best_min_uV, best_max_uV, state); if (ret < 0) goto out; if (best_c_rdev_done) set_bit(best_c_rdev, &c_rdev_done); } while (n_coupled > 1); out: return ret; } static int regulator_balance_voltage(struct regulator_dev *rdev, suspend_state_t state) { struct coupling_desc *c_desc = &rdev->coupling_desc; struct regulator_coupler *coupler = c_desc->coupler; bool skip_coupled = false; /* * If system is in a state other than PM_SUSPEND_ON, don't check * other coupled regulators. */ if (state != PM_SUSPEND_ON) skip_coupled = true; if (c_desc->n_resolved < c_desc->n_coupled) { rdev_err(rdev, "Not all coupled regulators registered\n"); return -EPERM; } /* Invoke custom balancer for customized couplers */ if (coupler && coupler->balance_voltage) return coupler->balance_voltage(coupler, rdev, state); return regulator_do_balance_voltage(rdev, state, skip_coupled); } /** * regulator_set_voltage - set regulator output voltage * @regulator: regulator source * @min_uV: Minimum required voltage in uV * @max_uV: Maximum acceptable voltage in uV * * Sets a voltage regulator to the desired output voltage. This can be set * during any regulator state. IOW, regulator can be disabled or enabled. * * If the regulator is enabled then the voltage will change to the new value * immediately otherwise if the regulator is disabled the regulator will * output at the new voltage when enabled. * * NOTE: If the regulator is shared between several devices then the lowest * request voltage that meets the system constraints will be used. * Regulator system constraints must be set for this regulator before * calling this function otherwise this call will fail. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) { struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = regulator_set_voltage_unlocked(regulator, min_uV, max_uV, PM_SUSPEND_ON); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_set_voltage); static inline int regulator_suspend_toggle(struct regulator_dev *rdev, suspend_state_t state, bool en) { struct regulator_state *rstate; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return -EINVAL; if (!rstate->changeable) return -EPERM; rstate->enabled = (en) ? ENABLE_IN_SUSPEND : DISABLE_IN_SUSPEND; return 0; } int regulator_suspend_enable(struct regulator_dev *rdev, suspend_state_t state) { return regulator_suspend_toggle(rdev, state, true); } EXPORT_SYMBOL_GPL(regulator_suspend_enable); int regulator_suspend_disable(struct regulator_dev *rdev, suspend_state_t state) { struct regulator *regulator; struct regulator_voltage *voltage; /* * if any consumer wants this regulator device keeping on in * suspend states, don't set it as disabled. */ list_for_each_entry(regulator, &rdev->consumer_list, list) { voltage = &regulator->voltage[state]; if (voltage->min_uV || voltage->max_uV) return 0; } return regulator_suspend_toggle(rdev, state, false); } EXPORT_SYMBOL_GPL(regulator_suspend_disable); static int _regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { struct regulator_dev *rdev = regulator->rdev; struct regulator_state *rstate; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return -EINVAL; if (rstate->min_uV == rstate->max_uV) { rdev_err(rdev, "The suspend voltage can't be changed!\n"); return -EPERM; } return regulator_set_voltage_unlocked(regulator, min_uV, max_uV, state); } int regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) { struct ww_acquire_ctx ww_ctx; int ret; /* PM_SUSPEND_ON is handled by regulator_set_voltage() */ if (regulator_check_states(state) || state == PM_SUSPEND_ON) return -EINVAL; regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = _regulator_set_suspend_voltage(regulator, min_uV, max_uV, state); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_set_suspend_voltage); /** * regulator_set_voltage_time - get raise/fall time * @regulator: regulator source * @old_uV: starting voltage in microvolts * @new_uV: target voltage in microvolts * * Provided with the starting and ending voltage, this function attempts to * calculate the time in microseconds required to rise or fall to this new * voltage. * * Return: ramp time in microseconds, or a negative error number if calculation failed. */ int regulator_set_voltage_time(struct regulator *regulator, int old_uV, int new_uV) { struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; int old_sel = -1; int new_sel = -1; int voltage; int i; if (ops->set_voltage_time) return ops->set_voltage_time(rdev, old_uV, new_uV); else if (!ops->set_voltage_time_sel) return _regulator_set_voltage_time(rdev, old_uV, new_uV); /* Currently requires operations to do this */ if (!ops->list_voltage || !rdev->desc->n_voltages) return -EINVAL; for (i = 0; i < rdev->desc->n_voltages; i++) { /* We only look for exact voltage matches here */ if (i < rdev->desc->linear_min_sel) continue; if (old_sel >= 0 && new_sel >= 0) break; voltage = regulator_list_voltage(regulator, i); if (voltage < 0) return -EINVAL; if (voltage == 0) continue; if (voltage == old_uV) old_sel = i; if (voltage == new_uV) new_sel = i; } if (old_sel < 0 || new_sel < 0) return -EINVAL; return ops->set_voltage_time_sel(rdev, old_sel, new_sel); } EXPORT_SYMBOL_GPL(regulator_set_voltage_time); /** * regulator_set_voltage_time_sel - get raise/fall time * @rdev: regulator source device * @old_selector: selector for starting voltage * @new_selector: selector for target voltage * * Provided with the starting and target voltage selectors, this function * returns time in microseconds required to rise or fall to this new voltage * * Drivers providing ramp_delay in regulation_constraints can use this as their * set_voltage_time_sel() operation. * * Return: ramp time in microseconds, or a negative error number if calculation failed. */ int regulator_set_voltage_time_sel(struct regulator_dev *rdev, unsigned int old_selector, unsigned int new_selector) { int old_volt, new_volt; /* sanity check */ if (!rdev->desc->ops->list_voltage) return -EINVAL; old_volt = rdev->desc->ops->list_voltage(rdev, old_selector); new_volt = rdev->desc->ops->list_voltage(rdev, new_selector); if (rdev->desc->ops->set_voltage_time) return rdev->desc->ops->set_voltage_time(rdev, old_volt, new_volt); else return _regulator_set_voltage_time(rdev, old_volt, new_volt); } EXPORT_SYMBOL_GPL(regulator_set_voltage_time_sel); int regulator_sync_voltage_rdev(struct regulator_dev *rdev) { int ret; regulator_lock(rdev); if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } /* balance only, if regulator is coupled */ if (rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); else ret = -EOPNOTSUPP; out: regulator_unlock(rdev); return ret; } /** * regulator_sync_voltage - re-apply last regulator output voltage * @regulator: regulator source * * Re-apply the last configured voltage. This is intended to be used * where some external control source the consumer is cooperating with * has caused the configured voltage to change. * * Return: 0 on success or a negative error number on failure. */ int regulator_sync_voltage(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; struct regulator_voltage *voltage = &regulator->voltage[PM_SUSPEND_ON]; int ret, min_uV, max_uV; if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) return 0; regulator_lock(rdev); if (!rdev->desc->ops->set_voltage && !rdev->desc->ops->set_voltage_sel) { ret = -EINVAL; goto out; } /* This is only going to work if we've had a voltage configured. */ if (!voltage->min_uV && !voltage->max_uV) { ret = -EINVAL; goto out; } min_uV = voltage->min_uV; max_uV = voltage->max_uV; /* This should be a paranoia check... */ ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; ret = regulator_check_consumers(rdev, &min_uV, &max_uV, 0); if (ret < 0) goto out; /* balance only, if regulator is coupled */ if (rdev->coupling_desc.n_coupled > 1) ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON); else ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_sync_voltage); int regulator_get_voltage_rdev(struct regulator_dev *rdev) { int sel, ret; bool bypassed; if (rdev->desc->ops->get_bypass) { ret = rdev->desc->ops->get_bypass(rdev, &bypassed); if (ret < 0) return ret; if (bypassed) { /* if bypassed the regulator must have a supply */ if (!rdev->supply) { rdev_err(rdev, "bypassed regulator has no supply!\n"); return -EPROBE_DEFER; } return regulator_get_voltage_rdev(rdev->supply->rdev); } } if (rdev->desc->ops->get_voltage_sel) { sel = rdev->desc->ops->get_voltage_sel(rdev); if (sel < 0) return sel; ret = rdev->desc->ops->list_voltage(rdev, sel); } else if (rdev->desc->ops->get_voltage) { ret = rdev->desc->ops->get_voltage(rdev); } else if (rdev->desc->ops->list_voltage) { ret = rdev->desc->ops->list_voltage(rdev, 0); } else if (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1)) { ret = rdev->desc->fixed_uV; } else if (rdev->supply) { ret = regulator_get_voltage_rdev(rdev->supply->rdev); } else if (rdev->supply_name) { return -EPROBE_DEFER; } else { return -EINVAL; } if (ret < 0) return ret; return ret - rdev->constraints->uV_offset; } EXPORT_SYMBOL_GPL(regulator_get_voltage_rdev); /** * regulator_get_voltage - get regulator output voltage * @regulator: regulator source * * Return: Current regulator voltage in uV, or a negative error number on failure. * * NOTE: If the regulator is disabled it will return the voltage value. This * function should not be used to determine regulator state. */ int regulator_get_voltage(struct regulator *regulator) { struct ww_acquire_ctx ww_ctx; int ret; regulator_lock_dependent(regulator->rdev, &ww_ctx); ret = regulator_get_voltage_rdev(regulator->rdev); regulator_unlock_dependent(regulator->rdev, &ww_ctx); return ret; } EXPORT_SYMBOL_GPL(regulator_get_voltage); /** * regulator_set_current_limit - set regulator output current limit * @regulator: regulator source * @min_uA: Minimum supported current in uA * @max_uA: Maximum supported current in uA * * Sets current sink to the desired output current. This can be set during * any regulator state. IOW, regulator can be disabled or enabled. * * If the regulator is enabled then the current will change to the new value * immediately otherwise if the regulator is disabled the regulator will * output at the new current when enabled. * * NOTE: Regulator system constraints must be set for this regulator before * calling this function otherwise this call will fail. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_current_limit(struct regulator *regulator, int min_uA, int max_uA) { struct regulator_dev *rdev = regulator->rdev; int ret; regulator_lock(rdev); /* sanity check */ if (!rdev->desc->ops->set_current_limit) { ret = -EINVAL; goto out; } /* constraints check */ ret = regulator_check_current_limit(rdev, &min_uA, &max_uA); if (ret < 0) goto out; ret = rdev->desc->ops->set_current_limit(rdev, min_uA, max_uA); out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_set_current_limit); static int _regulator_get_current_limit_unlocked(struct regulator_dev *rdev) { /* sanity check */ if (!rdev->desc->ops->get_current_limit) return -EINVAL; return rdev->desc->ops->get_current_limit(rdev); } static int _regulator_get_current_limit(struct regulator_dev *rdev) { int ret; regulator_lock(rdev); ret = _regulator_get_current_limit_unlocked(rdev); regulator_unlock(rdev); return ret; } /** * regulator_get_current_limit - get regulator output current * @regulator: regulator source * * Return: Current supplied by the specified current sink in uA, * or a negative error number on failure. * * NOTE: If the regulator is disabled it will return the current value. This * function should not be used to determine regulator state. */ int regulator_get_current_limit(struct regulator *regulator) { return _regulator_get_current_limit(regulator->rdev); } EXPORT_SYMBOL_GPL(regulator_get_current_limit); /** * regulator_get_unclaimed_power_budget - get regulator unclaimed power budget * @regulator: regulator source * * Return: Unclaimed power budget of the regulator in mW. */ int regulator_get_unclaimed_power_budget(struct regulator *regulator) { return regulator->rdev->constraints->pw_budget_mW - regulator->rdev->pw_requested_mW; } EXPORT_SYMBOL_GPL(regulator_get_unclaimed_power_budget); /** * regulator_request_power_budget - request power budget on a regulator * @regulator: regulator source * @pw_req: Power requested * * Return: 0 on success or a negative error number on failure. */ int regulator_request_power_budget(struct regulator *regulator, unsigned int pw_req) { struct regulator_dev *rdev = regulator->rdev; int ret = 0, pw_tot_req; regulator_lock(rdev); if (rdev->supply) { ret = regulator_request_power_budget(rdev->supply, pw_req); if (ret < 0) goto out; } pw_tot_req = rdev->pw_requested_mW + pw_req; if (pw_tot_req > rdev->constraints->pw_budget_mW) { rdev_warn(rdev, "power requested %d mW out of budget %d mW", pw_req, rdev->constraints->pw_budget_mW - rdev->pw_requested_mW); regulator_notifier_call_chain(rdev, REGULATOR_EVENT_OVER_CURRENT_WARN, NULL); ret = -ERANGE; goto out; } rdev->pw_requested_mW = pw_tot_req; out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_request_power_budget); /** * regulator_free_power_budget - free power budget on a regulator * @regulator: regulator source * @pw: Power to be released. * * Return: Power budget of the regulator in mW. */ void regulator_free_power_budget(struct regulator *regulator, unsigned int pw) { struct regulator_dev *rdev = regulator->rdev; int pw_tot_req; regulator_lock(rdev); if (rdev->supply) regulator_free_power_budget(rdev->supply, pw); pw_tot_req = rdev->pw_requested_mW - pw; if (pw_tot_req >= 0) rdev->pw_requested_mW = pw_tot_req; else rdev_warn(rdev, "too much power freed %d mW (already requested %d mW)", pw, rdev->pw_requested_mW); regulator_unlock(rdev); } EXPORT_SYMBOL_GPL(regulator_free_power_budget); /** * regulator_set_mode - set regulator operating mode * @regulator: regulator source * @mode: operating mode - one of the REGULATOR_MODE constants * * Set regulator operating mode to increase regulator efficiency or improve * regulation performance. * * NOTE: Regulator system constraints must be set for this regulator before * calling this function otherwise this call will fail. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_mode(struct regulator *regulator, unsigned int mode) { struct regulator_dev *rdev = regulator->rdev; int ret; int regulator_curr_mode; regulator_lock(rdev); /* sanity check */ if (!rdev->desc->ops->set_mode) { ret = -EINVAL; goto out; } /* return if the same mode is requested */ if (rdev->desc->ops->get_mode) { regulator_curr_mode = rdev->desc->ops->get_mode(rdev); if (regulator_curr_mode == mode) { ret = 0; goto out; } } /* constraints check */ ret = regulator_mode_constrain(rdev, &mode); if (ret < 0) goto out; ret = rdev->desc->ops->set_mode(rdev, mode); out: regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_set_mode); static unsigned int _regulator_get_mode_unlocked(struct regulator_dev *rdev) { /* sanity check */ if (!rdev->desc->ops->get_mode) return -EINVAL; return rdev->desc->ops->get_mode(rdev); } static unsigned int _regulator_get_mode(struct regulator_dev *rdev) { int ret; regulator_lock(rdev); ret = _regulator_get_mode_unlocked(rdev); regulator_unlock(rdev); return ret; } /** * regulator_get_mode - get regulator operating mode * @regulator: regulator source * * Get the current regulator operating mode. * * Return: Current operating mode as %REGULATOR_MODE_* values, * or a negative error number on failure. */ unsigned int regulator_get_mode(struct regulator *regulator) { return _regulator_get_mode(regulator->rdev); } EXPORT_SYMBOL_GPL(regulator_get_mode); static int rdev_get_cached_err_flags(struct regulator_dev *rdev) { int ret = 0; if (rdev->use_cached_err) { spin_lock(&rdev->err_lock); ret = rdev->cached_err; spin_unlock(&rdev->err_lock); } return ret; } static int _regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags) { int cached_flags, ret = 0; regulator_lock(rdev); cached_flags = rdev_get_cached_err_flags(rdev); if (rdev->desc->ops->get_error_flags) ret = rdev->desc->ops->get_error_flags(rdev, flags); else if (!rdev->use_cached_err) ret = -EINVAL; *flags |= cached_flags; regulator_unlock(rdev); return ret; } /** * regulator_get_error_flags - get regulator error information * @regulator: regulator source * @flags: pointer to store error flags * * Get the current regulator error information. * * Return: 0 on success or a negative error number on failure. */ int regulator_get_error_flags(struct regulator *regulator, unsigned int *flags) { return _regulator_get_error_flags(regulator->rdev, flags); } EXPORT_SYMBOL_GPL(regulator_get_error_flags); /** * regulator_set_load - set regulator load * @regulator: regulator source * @uA_load: load current * * Notifies the regulator core of a new device load. This is then used by * DRMS (if enabled by constraints) to set the most efficient regulator * operating mode for the new regulator loading. * * Consumer devices notify their supply regulator of the maximum power * they will require (can be taken from device datasheet in the power * consumption tables) when they change operational status and hence power * state. Examples of operational state changes that can affect power * consumption are :- * * o Device is opened / closed. * o Device I/O is about to begin or has just finished. * o Device is idling in between work. * * This information is also exported via sysfs to userspace. * * DRMS will sum the total requested load on the regulator and change * to the most efficient operating mode if platform constraints allow. * * NOTE: when a regulator consumer requests to have a regulator * disabled then any load that consumer requested no longer counts * toward the total requested load. If the regulator is re-enabled * then the previously requested load will start counting again. * * If a regulator is an always-on regulator then an individual consumer's * load will still be removed if that consumer is fully disabled. * * Return: 0 on success or a negative error number on failure. */ int regulator_set_load(struct regulator *regulator, int uA_load) { struct regulator_dev *rdev = regulator->rdev; int old_uA_load; int ret = 0; regulator_lock(rdev); old_uA_load = regulator->uA_load; regulator->uA_load = uA_load; if (regulator->enable_count && old_uA_load != uA_load) { ret = drms_uA_update(rdev); if (ret < 0) regulator->uA_load = old_uA_load; } regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_set_load); /** * regulator_allow_bypass - allow the regulator to go into bypass mode * * @regulator: Regulator to configure * @enable: enable or disable bypass mode * * Allow the regulator to go into bypass mode if all other consumers * for the regulator also enable bypass mode and the machine * constraints allow this. Bypass mode means that the regulator is * simply passing the input directly to the output with no regulation. * * Return: 0 on success or if changing bypass is not possible, or * a negative error number on failure. */ int regulator_allow_bypass(struct regulator *regulator, bool enable) { struct regulator_dev *rdev = regulator->rdev; const char *name = rdev_get_name(rdev); int ret = 0; if (!rdev->desc->ops->set_bypass) return 0; if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_BYPASS)) return 0; regulator_lock(rdev); if (enable && !regulator->bypass) { rdev->bypass_count++; if (rdev->bypass_count == rdev->open_count) { trace_regulator_bypass_enable(name); ret = rdev->desc->ops->set_bypass(rdev, enable); if (ret != 0) rdev->bypass_count--; else trace_regulator_bypass_enable_complete(name); } } else if (!enable && regulator->bypass) { rdev->bypass_count--; if (rdev->bypass_count != rdev->open_count) { trace_regulator_bypass_disable(name); ret = rdev->desc->ops->set_bypass(rdev, enable); if (ret != 0) rdev->bypass_count++; else trace_regulator_bypass_disable_complete(name); } } if (ret == 0) regulator->bypass = enable; regulator_unlock(rdev); return ret; } EXPORT_SYMBOL_GPL(regulator_allow_bypass); /** * regulator_register_notifier - register regulator event notifier * @regulator: regulator source * @nb: notifier block * * Register notifier block to receive regulator events. * * Return: 0 on success or a negative error number on failure. */ int regulator_register_notifier(struct regulator *regulator, struct notifier_block *nb) { return blocking_notifier_chain_register(&regulator->rdev->notifier, nb); } EXPORT_SYMBOL_GPL(regulator_register_notifier); /** * regulator_unregister_notifier - unregister regulator event notifier * @regulator: regulator source * @nb: notifier block * * Unregister regulator event notifier block. * * Return: 0 on success or a negative error number on failure. */ int regulator_unregister_notifier(struct regulator *regulator, struct notifier_block *nb) { return blocking_notifier_chain_unregister(&regulator->rdev->notifier, nb); } EXPORT_SYMBOL_GPL(regulator_unregister_notifier); /* notify regulator consumers and downstream regulator consumers. * Note mutex must be held by caller. */ static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { /* call rdev chain first */ int ret = blocking_notifier_call_chain(&rdev->notifier, event, data); if (IS_REACHABLE(CONFIG_REGULATOR_NETLINK_EVENTS)) { struct device *parent = rdev->dev.parent; const char *rname = rdev_get_name(rdev); char name[32]; /* Avoid duplicate debugfs directory names */ if (parent && rname == rdev->desc->name) { snprintf(name, sizeof(name), "%s-%s", dev_name(parent), rname); rname = name; } reg_generate_netlink_event(rname, event); } return ret; } int _regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers, enum regulator_get_type get_type) { int i; int ret; for (i = 0; i < num_consumers; i++) consumers[i].consumer = NULL; for (i = 0; i < num_consumers; i++) { consumers[i].consumer = _regulator_get(dev, consumers[i].supply, get_type); if (IS_ERR(consumers[i].consumer)) { ret = dev_err_probe(dev, PTR_ERR(consumers[i].consumer), "Failed to get supply '%s'\n", consumers[i].supply); consumers[i].consumer = NULL; goto err; } if (consumers[i].init_load_uA > 0) { ret = regulator_set_load(consumers[i].consumer, consumers[i].init_load_uA); if (ret) { i++; goto err; } } } return 0; err: while (--i >= 0) regulator_put(consumers[i].consumer); return ret; } /** * regulator_bulk_get - get multiple regulator consumers * * @dev: Device to supply * @num_consumers: Number of consumers to register * @consumers: Configuration of consumers; clients are stored here. * * This helper function allows drivers to get several regulator * consumers in one operation. If any of the regulators cannot be * acquired then any regulators that were allocated will be freed * before returning to the caller. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers) { return _regulator_bulk_get(dev, num_consumers, consumers, NORMAL_GET); } EXPORT_SYMBOL_GPL(regulator_bulk_get); static void regulator_bulk_enable_async(void *data, async_cookie_t cookie) { struct regulator_bulk_data *bulk = data; bulk->ret = regulator_enable(bulk->consumer); } /** * regulator_bulk_enable - enable multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to enable multiple regulator * clients in a single API call. If any consumers cannot be enabled * then any others that were enabled will be disabled again prior to * return. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { ASYNC_DOMAIN_EXCLUSIVE(async_domain); int i; int ret = 0; for (i = 0; i < num_consumers; i++) { async_schedule_domain(regulator_bulk_enable_async, &consumers[i], &async_domain); } async_synchronize_full_domain(&async_domain); /* If any consumer failed we need to unwind any that succeeded */ for (i = 0; i < num_consumers; i++) { if (consumers[i].ret != 0) { ret = consumers[i].ret; goto err; } } return 0; err: for (i = 0; i < num_consumers; i++) { if (consumers[i].ret < 0) pr_err("Failed to enable %s: %pe\n", consumers[i].supply, ERR_PTR(consumers[i].ret)); else regulator_disable(consumers[i].consumer); } return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_enable); /** * regulator_bulk_disable - disable multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to disable multiple regulator * clients in a single API call. If any consumers cannot be disabled * then any others that were disabled will be enabled again prior to * return. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_disable(int num_consumers, struct regulator_bulk_data *consumers) { int i; int ret, r; for (i = num_consumers - 1; i >= 0; --i) { ret = regulator_disable(consumers[i].consumer); if (ret != 0) goto err; } return 0; err: pr_err("Failed to disable %s: %pe\n", consumers[i].supply, ERR_PTR(ret)); for (++i; i < num_consumers; ++i) { r = regulator_enable(consumers[i].consumer); if (r != 0) pr_err("Failed to re-enable %s: %pe\n", consumers[i].supply, ERR_PTR(r)); } return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_disable); /** * regulator_bulk_force_disable - force disable multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to forcibly disable multiple regulator * clients in a single API call. * NOTE: This should be used for situations when device damage will * likely occur if the regulators are not disabled (e.g. over temp). * Although regulator_force_disable function call for some consumers can * return error numbers, the function is called for all consumers. * * Return: 0 on success or a negative error number on failure. */ int regulator_bulk_force_disable(int num_consumers, struct regulator_bulk_data *consumers) { int i; int ret = 0; for (i = 0; i < num_consumers; i++) { consumers[i].ret = regulator_force_disable(consumers[i].consumer); /* Store first error for reporting */ if (consumers[i].ret && !ret) ret = consumers[i].ret; } return ret; } EXPORT_SYMBOL_GPL(regulator_bulk_force_disable); /** * regulator_bulk_free - free multiple regulator consumers * * @num_consumers: Number of consumers * @consumers: Consumer data; clients are stored here. * * This convenience API allows consumers to free multiple regulator * clients in a single API call. */ void regulator_bulk_free(int num_consumers, struct regulator_bulk_data *consumers) { int i; for (i = 0; i < num_consumers; i++) { regulator_put(consumers[i].consumer); consumers[i].consumer = NULL; } } EXPORT_SYMBOL_GPL(regulator_bulk_free); /** * regulator_handle_critical - Handle events for system-critical regulators. * @rdev: The regulator device. * @event: The event being handled. * * This function handles critical events such as under-voltage, over-current, * and unknown errors for regulators deemed system-critical. On detecting such * events, it triggers a hardware protection shutdown with a defined timeout. */ static void regulator_handle_critical(struct regulator_dev *rdev, unsigned long event) { const char *reason = NULL; if (!rdev->constraints->system_critical) return; switch (event) { case REGULATOR_EVENT_UNDER_VOLTAGE: reason = "System critical regulator: voltage drop detected"; break; case REGULATOR_EVENT_OVER_CURRENT: reason = "System critical regulator: over-current detected"; break; case REGULATOR_EVENT_FAIL: reason = "System critical regulator: unknown error"; } if (!reason) return; hw_protection_trigger(reason, rdev->constraints->uv_less_critical_window_ms); } /** * regulator_notifier_call_chain - call regulator event notifier * @rdev: regulator source * @event: notifier block * @data: callback-specific data. * * Called by regulator drivers to notify clients a regulator event has * occurred. * * Return: %NOTIFY_DONE. */ int regulator_notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { regulator_handle_critical(rdev, event); _notifier_call_chain(rdev, event, data); return NOTIFY_DONE; } EXPORT_SYMBOL_GPL(regulator_notifier_call_chain); /** * regulator_mode_to_status - convert a regulator mode into a status * * @mode: Mode to convert * * Convert a regulator mode into a status. * * Return: %REGULATOR_STATUS_* value corresponding to given mode. */ int regulator_mode_to_status(unsigned int mode) { switch (mode) { case REGULATOR_MODE_FAST: return REGULATOR_STATUS_FAST; case REGULATOR_MODE_NORMAL: return REGULATOR_STATUS_NORMAL; case REGULATOR_MODE_IDLE: return REGULATOR_STATUS_IDLE; case REGULATOR_MODE_STANDBY: return REGULATOR_STATUS_STANDBY; default: return REGULATOR_STATUS_UNDEFINED; } } EXPORT_SYMBOL_GPL(regulator_mode_to_status); static struct attribute *regulator_dev_attrs[] = { &dev_attr_name.attr, &dev_attr_num_users.attr, &dev_attr_type.attr, &dev_attr_microvolts.attr, &dev_attr_microamps.attr, &dev_attr_opmode.attr, &dev_attr_state.attr, &dev_attr_status.attr, &dev_attr_bypass.attr, &dev_attr_requested_microamps.attr, &dev_attr_min_microvolts.attr, &dev_attr_max_microvolts.attr, &dev_attr_min_microamps.attr, &dev_attr_max_microamps.attr, &dev_attr_under_voltage.attr, &dev_attr_over_current.attr, &dev_attr_regulation_out.attr, &dev_attr_fail.attr, &dev_attr_over_temp.attr, &dev_attr_under_voltage_warn.attr, &dev_attr_over_current_warn.attr, &dev_attr_over_voltage_warn.attr, &dev_attr_over_temp_warn.attr, &dev_attr_suspend_standby_state.attr, &dev_attr_suspend_mem_state.attr, &dev_attr_suspend_disk_state.attr, &dev_attr_suspend_standby_microvolts.attr, &dev_attr_suspend_mem_microvolts.attr, &dev_attr_suspend_disk_microvolts.attr, &dev_attr_suspend_standby_mode.attr, &dev_attr_suspend_mem_mode.attr, &dev_attr_suspend_disk_mode.attr, &dev_attr_power_budget_milliwatt.attr, &dev_attr_power_requested_milliwatt.attr, NULL }; /* * To avoid cluttering sysfs (and memory) with useless state, only * create attributes that can be meaningfully displayed. */ static umode_t regulator_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct device *dev = kobj_to_dev(kobj); struct regulator_dev *rdev = dev_to_rdev(dev); const struct regulator_ops *ops = rdev->desc->ops; umode_t mode = attr->mode; /* these three are always present */ if (attr == &dev_attr_name.attr || attr == &dev_attr_num_users.attr || attr == &dev_attr_type.attr) return mode; /* some attributes need specific methods to be displayed */ if (attr == &dev_attr_microvolts.attr) { if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) || (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0) || (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1)) return mode; return 0; } if (attr == &dev_attr_microamps.attr) return ops->get_current_limit ? mode : 0; if (attr == &dev_attr_opmode.attr) return ops->get_mode ? mode : 0; if (attr == &dev_attr_state.attr) return (rdev->ena_pin || ops->is_enabled) ? mode : 0; if (attr == &dev_attr_status.attr) return ops->get_status ? mode : 0; if (attr == &dev_attr_bypass.attr) return ops->get_bypass ? mode : 0; if (attr == &dev_attr_under_voltage.attr || attr == &dev_attr_over_current.attr || attr == &dev_attr_regulation_out.attr || attr == &dev_attr_fail.attr || attr == &dev_attr_over_temp.attr || attr == &dev_attr_under_voltage_warn.attr || attr == &dev_attr_over_current_warn.attr || attr == &dev_attr_over_voltage_warn.attr || attr == &dev_attr_over_temp_warn.attr) return ops->get_error_flags ? mode : 0; /* constraints need specific supporting methods */ if (attr == &dev_attr_min_microvolts.attr || attr == &dev_attr_max_microvolts.attr) return (ops->set_voltage || ops->set_voltage_sel) ? mode : 0; if (attr == &dev_attr_min_microamps.attr || attr == &dev_attr_max_microamps.attr) return ops->set_current_limit ? mode : 0; if (attr == &dev_attr_suspend_standby_state.attr || attr == &dev_attr_suspend_mem_state.attr || attr == &dev_attr_suspend_disk_state.attr) return mode; if (attr == &dev_attr_suspend_standby_microvolts.attr || attr == &dev_attr_suspend_mem_microvolts.attr || attr == &dev_attr_suspend_disk_microvolts.attr) return ops->set_suspend_voltage ? mode : 0; if (attr == &dev_attr_suspend_standby_mode.attr || attr == &dev_attr_suspend_mem_mode.attr || attr == &dev_attr_suspend_disk_mode.attr) return ops->set_suspend_mode ? mode : 0; if (attr == &dev_attr_power_budget_milliwatt.attr || attr == &dev_attr_power_requested_milliwatt.attr) return rdev->constraints->pw_budget_mW != INT_MAX ? mode : 0; return mode; } static const struct attribute_group regulator_dev_group = { .attrs = regulator_dev_attrs, .is_visible = regulator_attr_is_visible, }; static const struct attribute_group *regulator_dev_groups[] = { &regulator_dev_group, NULL }; static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); } static void rdev_init_debugfs(struct regulator_dev *rdev) { struct device *parent = rdev->dev.parent; const char *rname = rdev_get_name(rdev); char name[NAME_MAX]; /* Avoid duplicate debugfs directory names */ if (parent && rname == rdev->desc->name) { snprintf(name, sizeof(name), "%s-%s", dev_name(parent), rname); rname = name; } rdev->debugfs = debugfs_create_dir(rname, debugfs_root); if (IS_ERR(rdev->debugfs)) rdev_dbg(rdev, "Failed to create debugfs directory\n"); debugfs_create_u32("use_count", 0444, rdev->debugfs, &rdev->use_count); debugfs_create_u32("open_count", 0444, rdev->debugfs, &rdev->open_count); debugfs_create_u32("bypass_count", 0444, rdev->debugfs, &rdev->bypass_count); } int regulator_coupler_register(struct regulator_coupler *coupler) { mutex_lock(&regulator_list_mutex); list_add_tail(&coupler->list, &regulator_coupler_list); mutex_unlock(&regulator_list_mutex); return 0; } static struct regulator_coupler * regulator_find_coupler(struct regulator_dev *rdev) { struct regulator_coupler *coupler; int err; /* * Note that regulators are appended to the list and the generic * coupler is registered first, hence it will be attached at last * if nobody cared. */ list_for_each_entry_reverse(coupler, &regulator_coupler_list, list) { err = coupler->attach_regulator(coupler, rdev); if (!err) { if (!coupler->balance_voltage && rdev->coupling_desc.n_coupled > 2) goto err_unsupported; return coupler; } if (err < 0) return ERR_PTR(err); if (err == 1) continue; break; } return ERR_PTR(-EINVAL); err_unsupported: if (coupler->detach_regulator) coupler->detach_regulator(coupler, rdev); rdev_err(rdev, "Voltage balancing for multiple regulator couples is unimplemented\n"); return ERR_PTR(-EPERM); } static void regulator_resolve_coupling(struct regulator_dev *rdev) { struct regulator_coupler *coupler = rdev->coupling_desc.coupler; struct coupling_desc *c_desc = &rdev->coupling_desc; int n_coupled = c_desc->n_coupled; struct regulator_dev *c_rdev; int i; for (i = 1; i < n_coupled; i++) { /* already resolved */ if (c_desc->coupled_rdevs[i]) continue; c_rdev = of_parse_coupled_regulator(rdev, i - 1); if (!c_rdev) continue; if (c_rdev->coupling_desc.coupler != coupler) { rdev_err(rdev, "coupler mismatch with %s\n", rdev_get_name(c_rdev)); return; } c_desc->coupled_rdevs[i] = c_rdev; c_desc->n_resolved++; regulator_resolve_coupling(c_rdev); } } static void regulator_remove_coupling(struct regulator_dev *rdev) { struct regulator_coupler *coupler = rdev->coupling_desc.coupler; struct coupling_desc *__c_desc, *c_desc = &rdev->coupling_desc; struct regulator_dev *__c_rdev, *c_rdev; unsigned int __n_coupled, n_coupled; int i, k; int err; n_coupled = c_desc->n_coupled; for (i = 1; i < n_coupled; i++) { c_rdev = c_desc->coupled_rdevs[i]; if (!c_rdev) continue; regulator_lock(c_rdev); __c_desc = &c_rdev->coupling_desc; __n_coupled = __c_desc->n_coupled; for (k = 1; k < __n_coupled; k++) { __c_rdev = __c_desc->coupled_rdevs[k]; if (__c_rdev == rdev) { __c_desc->coupled_rdevs[k] = NULL; __c_desc->n_resolved--; break; } } regulator_unlock(c_rdev); c_desc->coupled_rdevs[i] = NULL; c_desc->n_resolved--; } if (coupler && coupler->detach_regulator) { err = coupler->detach_regulator(coupler, rdev); if (err) rdev_err(rdev, "failed to detach from coupler: %pe\n", ERR_PTR(err)); } rdev->coupling_desc.n_coupled = 0; kfree(rdev->coupling_desc.coupled_rdevs); rdev->coupling_desc.coupled_rdevs = NULL; } static int regulator_init_coupling(struct regulator_dev *rdev) { struct regulator_dev **coupled; int err, n_phandles; if (!IS_ENABLED(CONFIG_OF)) n_phandles = 0; else n_phandles = of_get_n_coupled(rdev); coupled = kzalloc_objs(*coupled, n_phandles + 1); if (!coupled) return -ENOMEM; rdev->coupling_desc.coupled_rdevs = coupled; /* * Every regulator should always have coupling descriptor filled with * at least pointer to itself. */ rdev->coupling_desc.coupled_rdevs[0] = rdev; rdev->coupling_desc.n_coupled = n_phandles + 1; rdev->coupling_desc.n_resolved++; /* regulator isn't coupled */ if (n_phandles == 0) return 0; if (!of_check_coupling_data(rdev)) return -EPERM; mutex_lock(&regulator_list_mutex); rdev->coupling_desc.coupler = regulator_find_coupler(rdev); mutex_unlock(&regulator_list_mutex); if (IS_ERR(rdev->coupling_desc.coupler)) { err = PTR_ERR(rdev->coupling_desc.coupler); rdev_err(rdev, "failed to get coupler: %pe\n", ERR_PTR(err)); return err; } return 0; } static int generic_coupler_attach(struct regulator_coupler *coupler, struct regulator_dev *rdev) { if (rdev->coupling_desc.n_coupled > 2) { rdev_err(rdev, "Voltage balancing for multiple regulator couples is unimplemented\n"); return -EPERM; } if (!rdev->constraints->always_on) { rdev_err(rdev, "Coupling of a non always-on regulator is unimplemented\n"); return -ENOTSUPP; } return 0; } static struct regulator_coupler generic_regulator_coupler = { .attach_regulator = generic_coupler_attach, }; /** * regulator_register - register regulator * @dev: the device that drive the regulator * @regulator_desc: regulator to register * @cfg: runtime configuration for regulator * * Called by regulator drivers to register a regulator. * * Return: Pointer to a valid &struct regulator_dev on success or * an ERR_PTR() encoded negative error number on failure. */ struct regulator_dev * regulator_register(struct device *dev, const struct regulator_desc *regulator_desc, const struct regulator_config *cfg) { const struct regulator_init_data *init_data; struct regulator_config *config = NULL; static atomic_t regulator_no = ATOMIC_INIT(-1); struct regulator_dev *rdev; bool tried_supply_resolve = false; bool dangling_cfg_gpiod = false; bool dangling_of_gpiod = false; int ret, i; if (cfg == NULL) return ERR_PTR(-EINVAL); if (cfg->ena_gpiod) dangling_cfg_gpiod = true; if (regulator_desc == NULL) { ret = -EINVAL; goto rinse; } WARN_ON(!dev || !cfg->dev); if (regulator_desc->name == NULL || regulator_desc->ops == NULL) { ret = -EINVAL; goto rinse; } if (regulator_desc->type != REGULATOR_VOLTAGE && regulator_desc->type != REGULATOR_CURRENT) { ret = -EINVAL; goto rinse; } /* Only one of each should be implemented */ WARN_ON(regulator_desc->ops->get_voltage && regulator_desc->ops->get_voltage_sel); WARN_ON(regulator_desc->ops->set_voltage && regulator_desc->ops->set_voltage_sel); /* If we're using selectors we must implement list_voltage. */ if (regulator_desc->ops->get_voltage_sel && !regulator_desc->ops->list_voltage) { ret = -EINVAL; goto rinse; } if (regulator_desc->ops->set_voltage_sel && !regulator_desc->ops->list_voltage) { ret = -EINVAL; goto rinse; } rdev = kzalloc_obj(struct regulator_dev); if (rdev == NULL) { ret = -ENOMEM; goto rinse; } device_initialize(&rdev->dev); dev_set_drvdata(&rdev->dev, rdev); rdev->dev.class = &regulator_class; spin_lock_init(&rdev->err_lock); /* * Duplicate the config so the driver could override it after * parsing init data. */ config = kmemdup(cfg, sizeof(*cfg), GFP_KERNEL); if (config == NULL) { ret = -ENOMEM; goto clean; } /* * DT may override the config->init_data provided if the platform * needs to do so. If so, config->init_data is completely ignored. */ init_data = regulator_of_get_init_data(dev, regulator_desc, config, &rdev->dev.of_node); /* * Sometimes not all resources are probed already so we need to take * that into account. This happens most the time if the ena_gpiod comes * from a gpio extender or something else. */ if (PTR_ERR(init_data) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; goto clean; } /* * We need to keep track of any GPIO descriptor coming from the * device tree until we have handled it over to the core. If the * config that was passed in to this function DOES NOT contain * a descriptor, and the config after this call DOES contain * a descriptor, we definitely got one from parsing the device * tree. */ if (!cfg->ena_gpiod && config->ena_gpiod) dangling_of_gpiod = true; if (!init_data) { init_data = config->init_data; rdev->dev.of_node = of_node_get(config->of_node); } ww_mutex_init(&rdev->mutex, &regulator_ww_class); rdev->reg_data = config->driver_data; rdev->owner = regulator_desc->owner; rdev->desc = regulator_desc; if (config->regmap) rdev->regmap = config->regmap; else if (dev_get_regmap(dev, NULL)) rdev->regmap = dev_get_regmap(dev, NULL); else if (dev->parent) rdev->regmap = dev_get_regmap(dev->parent, NULL); INIT_LIST_HEAD(&rdev->consumer_list); INIT_LIST_HEAD(&rdev->list); BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); INIT_DELAYED_WORK(&rdev->disable_work, regulator_disable_work); if (init_data && init_data->supply_regulator) rdev->supply_name = init_data->supply_regulator; else if (regulator_desc->supply_name) rdev->supply_name = regulator_desc->supply_name; /* register with sysfs */ rdev->dev.parent = config->dev; dev_set_name(&rdev->dev, "regulator.%lu", (unsigned long) atomic_inc_return(&regulator_no)); /* set regulator constraints */ if (init_data) rdev->constraints = kmemdup(&init_data->constraints, sizeof(*rdev->constraints), GFP_KERNEL); else rdev->constraints = kzalloc_obj(*rdev->constraints); if (!rdev->constraints) { ret = -ENOMEM; goto wash; } if (regulator_desc->init_cb) { ret = regulator_desc->init_cb(rdev, config); if (ret < 0) goto wash; } if (config->ena_gpiod) { ret = regulator_ena_gpio_request(rdev, config); if (ret != 0) { rdev_err(rdev, "Failed to request enable GPIO: %pe\n", ERR_PTR(ret)); goto wash; } /* The regulator core took over the GPIO descriptor */ dangling_cfg_gpiod = false; dangling_of_gpiod = false; } ret = set_machine_constraints(rdev, false); if (ret == -EPROBE_DEFER) { /* Regulator might be in bypass mode or an always-on or boot-on * regulator and so needs its supply to set the constraints or * for enable. */ /* FIXME: this currently triggers a chicken-and-egg problem * when creating -SUPPLY symlink in sysfs to a regulator * that is just being created */ rdev_dbg(rdev, "will resolve supply early: %s\n", rdev->supply_name); ret = regulator_resolve_supply(rdev); if (!ret) ret = set_machine_constraints(rdev, false); else rdev_dbg(rdev, "unable to resolve supply early: %pe\n", ERR_PTR(ret)); tried_supply_resolve = true; } if (ret < 0) { if (ret != -EPROBE_DEFER) goto wash; rdev->constraints_pending = true; } ret = regulator_init_coupling(rdev); if (ret < 0) goto wash; /* add consumers devices */ if (init_data) { for (i = 0; i < init_data->num_consumer_supplies; i++) { ret = set_consumer_device_supply(rdev, init_data->consumer_supplies[i].dev_name, init_data->consumer_supplies[i].supply); if (ret < 0) { dev_err(dev, "Failed to set supply %s\n", init_data->consumer_supplies[i].supply); goto unset_supplies; } } } if (!rdev->desc->ops->get_voltage && !rdev->desc->ops->list_voltage && !rdev->desc->fixed_uV) rdev->is_switch = true; ret = device_add(&rdev->dev); if (ret != 0) goto unset_supplies; if (!tried_supply_resolve) { /* * As an optimisation, try to resolve our supply (if any) now to * avoid adding the bus device. Errors are not fatal at this * stage, we'll simply try again later. */ ret = regulator_resolve_supply(rdev); if (ret) rdev_dbg(rdev, "unable to resolve supply (ignoring): %pe\n", ERR_PTR(ret)); } /* * If we have a supply but couldn't resolve it yet, register a device * with our bus, so that the bus probe gets called whenever any new * driver binds, allowing us to retry matching supplies and which then * triggers (re)probe of consumers if successful. */ if (rdev->supply_name && !rdev->supply) { device_initialize(&rdev->bdev); rdev->bdev.bus = &regulator_bus; rdev->bdev.parent = &rdev->dev; device_set_pm_not_required(&rdev->dev); dev_set_name(&rdev->bdev, "%s.bdev", dev_name(&rdev->dev)); ret = device_add(&rdev->bdev); if (ret) goto del_cdev_and_bdev; } rdev_init_debugfs(rdev); /* try to resolve regulators coupling since a new one was registered */ mutex_lock(&regulator_list_mutex); regulator_resolve_coupling(rdev); mutex_unlock(&regulator_list_mutex); kfree(config); return rdev; del_cdev_and_bdev: if (rdev->bdev.bus == &regulator_bus) put_device(&rdev->bdev); device_del(&rdev->dev); unset_supplies: mutex_lock(&regulator_list_mutex); unset_regulator_supplies(rdev); regulator_remove_coupling(rdev); mutex_unlock(&regulator_list_mutex); wash: regulator_put(rdev->supply); kfree(rdev->coupling_desc.coupled_rdevs); mutex_lock(&regulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(&regulator_list_mutex); clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); kfree(config); put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(regulator_register); /** * regulator_unregister - unregister regulator * @rdev: regulator to unregister * * Called by regulator drivers to unregister a regulator. */ void regulator_unregister(struct regulator_dev *rdev) { if (rdev == NULL) return; if (rdev->supply) { regulator_unregister_notifier(rdev->supply, &rdev->supply_fwd_nb); while (rdev->use_count--) regulator_disable(rdev->supply); regulator_put(rdev->supply); } flush_work(&rdev->disable_work.work); mutex_lock(&regulator_list_mutex); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); list_del(&rdev->list); regulator_ena_gpio_free(rdev); if (rdev->bdev.bus == &regulator_bus) /* only if the device was added in the first place */ device_unregister(&rdev->bdev); device_unregister(&rdev->dev); mutex_unlock(&regulator_list_mutex); } EXPORT_SYMBOL_GPL(regulator_unregister); #ifdef CONFIG_SUSPEND /** * regulator_suspend - prepare regulators for system wide suspend * @dev: ``&struct device`` pointer that is passed to _regulator_suspend() * * Configure each regulator with it's suspend operating parameters for state. * * Return: 0 on success or a negative error number on failure. */ static int regulator_suspend(struct device *dev) { struct regulator_dev *rdev = dev_to_rdev(dev); suspend_state_t state = pm_suspend_target_state; int ret; const struct regulator_state *rstate; rstate = regulator_get_suspend_state_check(rdev, state); if (!rstate) return 0; regulator_lock(rdev); ret = __suspend_set_state(rdev, rstate); regulator_unlock(rdev); return ret; } static int regulator_resume(struct device *dev) { suspend_state_t state = pm_suspend_target_state; struct regulator_dev *rdev = dev_to_rdev(dev); struct regulator_state *rstate; int ret = 0; rstate = regulator_get_suspend_state(rdev, state); if (rstate == NULL) return 0; /* Avoid grabbing the lock if we don't need to */ if (!rdev->desc->ops->resume) return 0; regulator_lock(rdev); if (rstate->enabled == ENABLE_IN_SUSPEND || rstate->enabled == DISABLE_IN_SUSPEND) ret = rdev->desc->ops->resume(rdev); regulator_unlock(rdev); return ret; } #else /* !CONFIG_SUSPEND */ #define regulator_suspend NULL #define regulator_resume NULL #endif /* !CONFIG_SUSPEND */ #ifdef CONFIG_PM static const struct dev_pm_ops __maybe_unused regulator_pm_ops = { .suspend = regulator_suspend, .resume = regulator_resume, }; #endif const struct class regulator_class = { .name = "regulator", .dev_release = regulator_dev_release, .dev_groups = regulator_dev_groups, #ifdef CONFIG_PM .pm = &regulator_pm_ops, #endif }; #define bdev_to_rdev(__bdev) container_of_const(__bdev, struct regulator_dev, bdev) static int regulator_bus_match(struct device *bdev, const struct device_driver *drv) { /* Match always succeeds, we only have one driver */ return 1; } static int regulator_bus_probe(struct device *bdev) { struct regulator_dev *rdev = bdev_to_rdev(bdev); int ret; ret = regulator_resolve_supply(rdev); if (ret) rdev_dbg(rdev, "unable to resolve supply or constraints '%s': %pe\n", rdev->supply_name, ERR_PTR(ret)); else rdev_dbg(rdev, "resolved supply '%s'\n", rdev->supply_name); return ret; } static const struct bus_type regulator_bus = { .name = "regulator", .match = regulator_bus_match, .probe = regulator_bus_probe, }; static struct device_driver regulator_bus_driver = { .name = "regulator-bus-drv", .bus = &regulator_bus, .suppress_bind_attrs = true, .probe_type = PROBE_PREFER_ASYNCHRONOUS, }; /** * regulator_has_full_constraints - the system has fully specified constraints * * Calling this function will cause the regulator API to disable all * regulators which have a zero use count and don't have an always_on * constraint in a late_initcall. * * The intention is that this will become the default behaviour in a * future kernel release so users are encouraged to use this facility * now. */ void regulator_has_full_constraints(void) { has_full_constraints = 1; } EXPORT_SYMBOL_GPL(regulator_has_full_constraints); /** * rdev_get_drvdata - get rdev regulator driver data * @rdev: regulator * * Get rdev regulator driver private data. This call can be used in the * regulator driver context. * * Return: Pointer to regulator driver private data. */ void *rdev_get_drvdata(struct regulator_dev *rdev) { return rdev->reg_data; } EXPORT_SYMBOL_GPL(rdev_get_drvdata); /** * regulator_get_drvdata - get regulator driver data * @regulator: regulator * * Get regulator driver private data. This call can be used in the consumer * driver context when non API regulator specific functions need to be called. * * Return: Pointer to regulator driver private data. */ void *regulator_get_drvdata(struct regulator *regulator) { return regulator->rdev->reg_data; } EXPORT_SYMBOL_GPL(regulator_get_drvdata); /** * regulator_set_drvdata - set regulator driver data * @regulator: regulator * @data: data */ void regulator_set_drvdata(struct regulator *regulator, void *data) { regulator->rdev->reg_data = data; } EXPORT_SYMBOL_GPL(regulator_set_drvdata); /** * rdev_get_id - get regulator ID * @rdev: regulator * * Return: Regulator ID for @rdev. */ int rdev_get_id(struct regulator_dev *rdev) { return rdev->desc->id; } EXPORT_SYMBOL_GPL(rdev_get_id); struct device *rdev_get_dev(struct regulator_dev *rdev) { return &rdev->dev; } EXPORT_SYMBOL_GPL(rdev_get_dev); struct regmap *rdev_get_regmap(struct regulator_dev *rdev) { return rdev->regmap; } EXPORT_SYMBOL_GPL(rdev_get_regmap); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data) { return reg_init_data->driver_data; } EXPORT_SYMBOL_GPL(regulator_get_init_drvdata); #ifdef CONFIG_DEBUG_FS static int supply_map_show(struct seq_file *sf, void *data) { struct regulator_map *map; list_for_each_entry(map, &regulator_map_list, list) { seq_printf(sf, "%s -> %s.%s\n", rdev_get_name(map->regulator), map->dev_name, map->supply); } return 0; } DEFINE_SHOW_ATTRIBUTE(supply_map); struct summary_data { struct seq_file *s; struct regulator_dev *parent; int level; }; static void regulator_summary_show_subtree(struct seq_file *s, struct regulator_dev *rdev, int level); static int regulator_summary_show_children(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct summary_data *summary_data = data; if (rdev->supply && rdev->supply->rdev == summary_data->parent) regulator_summary_show_subtree(summary_data->s, rdev, summary_data->level + 1); return 0; } static void regulator_summary_show_subtree(struct seq_file *s, struct regulator_dev *rdev, int level) { struct regulation_constraints *c; struct regulator *consumer; struct summary_data summary_data; unsigned int opmode; if (!rdev) return; opmode = _regulator_get_mode_unlocked(rdev); seq_printf(s, "%*s%-*s %3d %4d %6d %7s ", level * 3 + 1, "", 30 - level * 3, rdev_get_name(rdev), rdev->use_count, rdev->open_count, rdev->bypass_count, regulator_opmode_to_str(opmode)); seq_printf(s, "%5dmV ", regulator_get_voltage_rdev(rdev) / 1000); seq_printf(s, "%5dmA ", _regulator_get_current_limit_unlocked(rdev) / 1000); c = rdev->constraints; if (c) { switch (rdev->desc->type) { case REGULATOR_VOLTAGE: seq_printf(s, "%5dmV %5dmV ", c->min_uV / 1000, c->max_uV / 1000); break; case REGULATOR_CURRENT: seq_printf(s, "%5dmA %5dmA ", c->min_uA / 1000, c->max_uA / 1000); break; } } seq_puts(s, "\n"); list_for_each_entry(consumer, &rdev->consumer_list, list) { if (consumer->dev && consumer->dev->class == &regulator_class) continue; seq_printf(s, "%*s%-*s ", (level + 1) * 3 + 1, "", 30 - (level + 1) * 3, consumer->supply_name ? consumer->supply_name : consumer->dev ? dev_name(consumer->dev) : "deviceless"); switch (rdev->desc->type) { case REGULATOR_VOLTAGE: seq_printf(s, "%3d %33dmA%c%5dmV %5dmV", consumer->enable_count, consumer->uA_load / 1000, consumer->uA_load && !consumer->enable_count ? '*' : ' ', consumer->voltage[PM_SUSPEND_ON].min_uV / 1000, consumer->voltage[PM_SUSPEND_ON].max_uV / 1000); break; case REGULATOR_CURRENT: break; } seq_puts(s, "\n"); } summary_data.s = s; summary_data.level = level; summary_data.parent = rdev; class_for_each_device(&regulator_class, NULL, &summary_data, regulator_summary_show_children); } struct summary_lock_data { struct ww_acquire_ctx *ww_ctx; struct regulator_dev **new_contended_rdev; struct regulator_dev **old_contended_rdev; }; static int regulator_summary_lock_one(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct summary_lock_data *lock_data = data; int ret = 0; if (rdev != *lock_data->old_contended_rdev) { ret = regulator_lock_nested(rdev, lock_data->ww_ctx); if (ret == -EDEADLK) *lock_data->new_contended_rdev = rdev; else WARN_ON_ONCE(ret); } else { *lock_data->old_contended_rdev = NULL; } return ret; } static int regulator_summary_unlock_one(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct summary_lock_data *lock_data = data; if (lock_data) { if (rdev == *lock_data->new_contended_rdev) return -EDEADLK; } regulator_unlock(rdev); return 0; } static int regulator_summary_lock_all(struct ww_acquire_ctx *ww_ctx, struct regulator_dev **new_contended_rdev, struct regulator_dev **old_contended_rdev) { struct summary_lock_data lock_data; int ret; lock_data.ww_ctx = ww_ctx; lock_data.new_contended_rdev = new_contended_rdev; lock_data.old_contended_rdev = old_contended_rdev; ret = class_for_each_device(&regulator_class, NULL, &lock_data, regulator_summary_lock_one); if (ret) class_for_each_device(&regulator_class, NULL, &lock_data, regulator_summary_unlock_one); return ret; } static void regulator_summary_lock(struct ww_acquire_ctx *ww_ctx) { struct regulator_dev *new_contended_rdev = NULL; struct regulator_dev *old_contended_rdev = NULL; int err; mutex_lock(&regulator_list_mutex); ww_acquire_init(ww_ctx, &regulator_ww_class); do { if (new_contended_rdev) { ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx); old_contended_rdev = new_contended_rdev; old_contended_rdev->ref_cnt++; old_contended_rdev->mutex_owner = current; } err = regulator_summary_lock_all(ww_ctx, &new_contended_rdev, &old_contended_rdev); if (old_contended_rdev) regulator_unlock(old_contended_rdev); } while (err == -EDEADLK); ww_acquire_done(ww_ctx); } static void regulator_summary_unlock(struct ww_acquire_ctx *ww_ctx) { class_for_each_device(&regulator_class, NULL, NULL, regulator_summary_unlock_one); ww_acquire_fini(ww_ctx); mutex_unlock(&regulator_list_mutex); } static int regulator_summary_show_roots(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct seq_file *s = data; if (!rdev->supply) regulator_summary_show_subtree(s, rdev, 0); return 0; } static int regulator_summary_show(struct seq_file *s, void *data) { struct ww_acquire_ctx ww_ctx; seq_puts(s, " regulator use open bypass opmode voltage current min max\n"); seq_puts(s, "---------------------------------------------------------------------------------------\n"); regulator_summary_lock(&ww_ctx); class_for_each_device(&regulator_class, NULL, s, regulator_summary_show_roots); regulator_summary_unlock(&ww_ctx); return 0; } DEFINE_SHOW_ATTRIBUTE(regulator_summary); #endif /* CONFIG_DEBUG_FS */ static int __init regulator_init(void) { int ret; ret = bus_register(&regulator_bus); if (ret) return ret; ret = class_register(&regulator_class); if (ret) goto err_class; ret = driver_register(&regulator_bus_driver); if (ret) goto err_driver; debugfs_root = debugfs_create_dir("regulator", NULL); if (IS_ERR(debugfs_root)) pr_debug("regulator: Failed to create debugfs directory\n"); #ifdef CONFIG_DEBUG_FS debugfs_create_file("supply_map", 0444, debugfs_root, NULL, &supply_map_fops); debugfs_create_file("regulator_summary", 0444, debugfs_root, NULL, &regulator_summary_fops); #endif regulator_dummy_init(); regulator_coupler_register(&generic_regulator_coupler); return 0; err_driver: class_unregister(&regulator_class); err_class: bus_unregister(&regulator_bus); return ret; } /* init early to allow our consumers to complete system booting */ core_initcall(regulator_init); static int regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); struct regulation_constraints *c = rdev->constraints; int ret; if (c && c->always_on) return 0; if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) return 0; regulator_lock(rdev); if (rdev->use_count) goto unlock; /* If reading the status failed, assume that it's off. */ if (_regulator_is_enabled(rdev) <= 0) goto unlock; if (have_full_constraints()) { /* We log since this may kill the system if it goes * wrong. */ rdev_info(rdev, "disabling\n"); ret = _regulator_do_disable(rdev); if (ret != 0) rdev_err(rdev, "couldn't disable: %pe\n", ERR_PTR(ret)); } else { /* The intention is that in future we will * assume that full constraints are provided * so warn even if we aren't going to do * anything here. */ rdev_warn(rdev, "incomplete constraints, leaving on\n"); } unlock: regulator_unlock(rdev); return 0; } static bool regulator_ignore_unused; static int __init regulator_ignore_unused_setup(char *__unused) { regulator_ignore_unused = true; return 1; } __setup("regulator_ignore_unused", regulator_ignore_unused_setup); static void regulator_init_complete_work_function(struct work_struct *work) { /* * For debugging purposes, it may be useful to prevent unused * regulators from being disabled. */ if (regulator_ignore_unused) { pr_warn("regulator: Not disabling unused regulators\n"); return; } /* If we have a full configuration then disable any regulators * we have permission to change the status for and which are * not in use or always_on. This is effectively the default * for DT and ACPI as they have full constraints. */ class_for_each_device(&regulator_class, NULL, NULL, regulator_late_cleanup); } static DECLARE_DELAYED_WORK(regulator_init_complete_work, regulator_init_complete_work_function); static int __init regulator_init_complete(void) { /* * Since DT doesn't provide an idiomatic mechanism for * enabling full constraints and since it's much more natural * with DT to provide them just assume that a DT enabled * system has full constraints. */ if (of_have_populated_dt()) has_full_constraints = true; /* * We punt completion for an arbitrary amount of time since * systems like distros will load many drivers from userspace * so consumers might not always be ready yet, this is * particularly an issue with laptops where this might bounce * the display off then on. Ideally we'd get a notification * from userspace when this happens but we don't so just wait * a bit and hope we waited long enough. It'd be better if * we'd only do this on systems that need it, and a kernel * command line option might be useful. */ schedule_delayed_work(&regulator_init_complete_work, msecs_to_jiffies(30000)); return 0; } late_initcall_sync(regulator_init_complete);
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/irqflags.h * * IRQ flags tracing: follow the state of the hardirq and softirq flags and * provide callbacks for transitions between ON and OFF states. * * This file gets included from lowlevel asm headers too, to provide * wrapped versions of the local_irq_*() APIs, based on the * raw_local_irq_*() macros from the lowlevel headers. */ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H #include <linux/irqflags_types.h> #include <linux/typecheck.h> #include <linux/cleanup.h> #include <asm/irqflags.h> #include <asm/percpu.h> struct task_struct; /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); extern void lockdep_softirqs_off(unsigned long ip); extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); extern void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } static inline void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle) {} #endif #ifdef CONFIG_TRACE_IRQFLAGS DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_on_prepare(void); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); # define lockdep_hardirq_context() (raw_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) # define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ if (__this_cpu_inc_return(hardirq_context) == 1)\ current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ do { \ current->hardirq_threaded = 1; \ } while (0) # define lockdep_hardirq_exit() \ do { \ __this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_hrtimer_enter(__hrtimer) \ ({ \ bool __expires_hardirq = true; \ \ if (!__hrtimer->is_hard) { \ current->irq_config = 1; \ __expires_hardirq = false; \ } \ __expires_hardirq; \ }) # define lockdep_hrtimer_exit(__expires_hardirq) \ do { \ if (!__expires_hardirq) \ current->irq_config = 0; \ } while (0) # define lockdep_posixtimer_enter() \ do { \ current->irq_config = 1; \ } while (0) # define lockdep_posixtimer_exit() \ do { \ current->irq_config = 0; \ } while (0) # define lockdep_irq_work_enter(_flags) \ do { \ if (!((_flags) & IRQ_WORK_HARD_IRQ)) \ current->irq_config = 1; \ } while (0) # define lockdep_irq_work_exit(_flags) \ do { \ if (!((_flags) & IRQ_WORK_HARD_IRQ)) \ current->irq_config = 0; \ } while (0) #else # define trace_hardirqs_on_prepare() do { } while (0) # define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) # define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 # define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_threaded() do { } while (0) # define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false # define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) # define lockdep_irq_work_exit(__work) do { } while (0) #endif #if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT) # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ } while (0) # define lockdep_softirq_exit() \ do { \ current->softirq_context--; \ } while (0) #else # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); extern void start_critical_timings(void); #else # define stop_critical_timings() do { } while (0) # define start_critical_timings() do { } while (0) #endif #ifdef CONFIG_DEBUG_IRQFLAGS extern void warn_bogus_irq_restore(void); #define raw_check_bogus_irq_restore() \ do { \ if (unlikely(!arch_irqs_disabled())) \ warn_bogus_irq_restore(); \ } while (0) #else #define raw_check_bogus_irq_restore() do { } while (0) #endif /* * Wrap the arch provided IRQ routines to provide appropriate checks. */ #define raw_local_irq_disable() arch_local_irq_disable() #define raw_local_irq_enable() arch_local_irq_enable() #define raw_local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ flags = arch_local_irq_save(); \ } while (0) #define raw_local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ raw_check_bogus_irq_restore(); \ arch_local_irq_restore(flags); \ } while (0) #define raw_local_save_flags(flags) \ do { \ typecheck(unsigned long, flags); \ flags = arch_local_save_flags(); \ } while (0) #define raw_irqs_disabled_flags(flags) \ ({ \ typecheck(unsigned long, flags); \ arch_irqs_disabled_flags(flags); \ }) #define raw_irqs_disabled() (arch_irqs_disabled()) #define raw_safe_halt() arch_safe_halt() /* * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ #ifdef CONFIG_TRACE_IRQFLAGS #define local_irq_enable() \ do { \ trace_hardirqs_on(); \ raw_local_irq_enable(); \ } while (0) #define local_irq_disable() \ do { \ bool was_disabled = raw_irqs_disabled();\ raw_local_irq_disable(); \ if (!was_disabled) \ trace_hardirqs_off(); \ } while (0) #define local_irq_save(flags) \ do { \ raw_local_irq_save(flags); \ if (!raw_irqs_disabled_flags(flags)) \ trace_hardirqs_off(); \ } while (0) #define local_irq_restore(flags) \ do { \ if (!raw_irqs_disabled_flags(flags)) \ trace_hardirqs_on(); \ raw_local_irq_restore(flags); \ } while (0) #define safe_halt() \ do { \ trace_hardirqs_on(); \ raw_safe_halt(); \ } while (0) #else /* !CONFIG_TRACE_IRQFLAGS */ #define local_irq_enable() do { raw_local_irq_enable(); } while (0) #define local_irq_disable() do { raw_local_irq_disable(); } while (0) #define local_irq_save(flags) do { raw_local_irq_save(flags); } while (0) #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) #define safe_halt() do { raw_safe_halt(); } while (0) #endif /* CONFIG_TRACE_IRQFLAGS */ #define local_save_flags(flags) raw_local_save_flags(flags) /* * Some architectures don't define arch_irqs_disabled(), so even if either * definition would be fine we need to use different ones for the time being * to avoid build issues. */ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define irqs_disabled() \ ({ \ unsigned long _flags; \ raw_local_save_flags(_flags); \ raw_irqs_disabled_flags(_flags); \ }) #else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ #define irqs_disabled() raw_irqs_disabled() #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) DEFINE_LOCK_GUARD_0(irq, local_irq_disable(), local_irq_enable()) DEFINE_LOCK_GUARD_0(irqsave, local_irq_save(_T->flags), local_irq_restore(_T->flags), unsigned long flags) #endif
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008 Red Hat. All rights reserved. */ #include <linux/pagemap.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/math64.h> #include <linux/ratelimit.h> #include <linux/error-injection.h> #include <linux/sched/mm.h> #include <linux/string_choices.h> #include "extent-tree.h" #include "fs.h" #include "messages.h" #include "misc.h" #include "free-space-cache.h" #include "transaction.h" #include "disk-io.h" #include "extent_io.h" #include "space-info.h" #include "block-group.h" #include "discard.h" #include "subpage.h" #include "inode-item.h" #include "accessors.h" #include "file-item.h" #include "file.h" #include "super.h" #include "relocation.h" #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_64K #define FORCE_EXTENT_THRESHOLD SZ_1M static struct kmem_cache *btrfs_free_space_cachep; static struct kmem_cache *btrfs_free_space_bitmap_cachep; struct btrfs_trim_range { u64 start; u64 bytes; struct list_head list; }; static int link_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info); static void unlink_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat); static int search_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *bitmap_info, u64 *offset, u64 *bytes, bool for_alloc); static void free_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *bitmap_info); static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset, u64 bytes, bool update_stats); static void btrfs_crc32c_final(u32 crc, u8 *result) { put_unaligned_le32(~crc, result); } static void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) { struct btrfs_free_space *info; struct rb_node *node; while ((node = rb_last(&ctl->free_space_offset)) != NULL) { info = rb_entry(node, struct btrfs_free_space, offset_index); if (!info->bitmap) { unlink_free_space(ctl, info, true); kmem_cache_free(btrfs_free_space_cachep, info); } else { free_bitmap(ctl, info); } cond_resched_lock(&ctl->tree_lock); } } static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_path *path, u64 offset) { struct btrfs_key key; struct btrfs_key location; struct btrfs_disk_key disk_key; struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct btrfs_inode *inode; unsigned nofs_flag; int ret; key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.type = 0; key.offset = offset; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ERR_PTR(ret); if (ret > 0) { btrfs_release_path(path); return ERR_PTR(-ENOENT); } leaf = path->nodes[0]; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); btrfs_free_space_key(leaf, header, &disk_key); btrfs_disk_key_to_cpu(&location, &disk_key); btrfs_release_path(path); /* * We are often under a trans handle at this point, so we need to make * sure NOFS is set to keep us from deadlocking. */ nofs_flag = memalloc_nofs_save(); inode = btrfs_iget_path(location.objectid, root, path); btrfs_release_path(path); memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return ERR_CAST(inode); mapping_set_gfp_mask(inode->vfs_inode.i_mapping, mapping_gfp_constraint(inode->vfs_inode.i_mapping, ~(__GFP_FS | __GFP_HIGHMEM))); return &inode->vfs_inode; } struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, struct btrfs_path *path) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct inode *inode = NULL; u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; spin_lock(&block_group->lock); if (block_group->inode) inode = igrab(&block_group->inode->vfs_inode); spin_unlock(&block_group->lock); if (inode) return inode; inode = __lookup_free_space_inode(fs_info->tree_root, path, block_group->start); if (IS_ERR(inode)) return inode; spin_lock(&block_group->lock); if (!((BTRFS_I(inode)->flags & flags) == flags)) { btrfs_info(fs_info, "Old style space inode found, converting."); BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; block_group->disk_cache_state = BTRFS_DC_CLEAR; } if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) block_group->inode = BTRFS_I(igrab(inode)); spin_unlock(&block_group->lock); return inode; } static int __create_free_space_inode(struct btrfs_root *root, struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 ino, u64 offset) { struct btrfs_key key; struct btrfs_disk_key disk_key; struct btrfs_free_space_header *header; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; /* We inline CRCs for the free disk space cache */ const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; int ret; ret = btrfs_insert_empty_inode(trans, root, path, ino); if (ret) return ret; leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); btrfs_item_key(leaf, &disk_key, path->slots[0]); memzero_extent_buffer(leaf, (unsigned long)inode_item, sizeof(*inode_item)); btrfs_set_inode_generation(leaf, inode_item, trans->transid); btrfs_set_inode_size(leaf, inode_item, 0); btrfs_set_inode_nbytes(leaf, inode_item, 0); btrfs_set_inode_uid(leaf, inode_item, 0); btrfs_set_inode_gid(leaf, inode_item, 0); btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); btrfs_set_inode_flags(leaf, inode_item, flags); btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_transid(leaf, inode_item, trans->transid); btrfs_set_inode_block_group(leaf, inode_item, offset); btrfs_release_path(path); key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.type = 0; key.offset = offset; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(struct btrfs_free_space_header)); if (ret < 0) { btrfs_release_path(path); return ret; } leaf = path->nodes[0]; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); btrfs_set_free_space_key(leaf, header, &disk_key); btrfs_release_path(path); return 0; } int create_free_space_inode(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct btrfs_path *path) { int ret; u64 ino; ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino); if (ret < 0) return ret; return __create_free_space_inode(trans->fs_info->tree_root, trans, path, ino, block_group->start); } /* * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode * handles lookup, otherwise it takes ownership and iputs the inode. * Don't reuse an inode pointer after passing it into this function. */ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, struct inode *inode, struct btrfs_block_group *block_group) { BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; int ret = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; if (!inode) inode = lookup_free_space_inode(block_group, path); if (IS_ERR(inode)) { if (PTR_ERR(inode) != -ENOENT) ret = PTR_ERR(inode); return ret; } ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(BTRFS_I(inode)); return ret; } clear_nlink(inode); /* One for the block groups ref */ spin_lock(&block_group->lock); if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) { block_group->inode = NULL; spin_unlock(&block_group->lock); iput(inode); } else { spin_unlock(&block_group->lock); } /* One for the lookup ref */ btrfs_add_delayed_iput(BTRFS_I(inode)); key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.type = 0; key.offset = block_group->start; ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path, -1, 1); if (ret) { if (ret > 0) ret = 0; return ret; } return btrfs_del_item(trans, trans->fs_info->tree_root, path); } int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct inode *vfs_inode) { struct btrfs_truncate_control control = { .inode = BTRFS_I(vfs_inode), .new_size = 0, .ino = btrfs_ino(BTRFS_I(vfs_inode)), .min_type = BTRFS_EXTENT_DATA_KEY, .clear_extent_range = true, }; struct btrfs_inode *inode = BTRFS_I(vfs_inode); struct btrfs_root *root = inode->root; struct extent_state *cached_state = NULL; int ret = 0; bool locked = false; if (block_group) { BTRFS_PATH_AUTO_FREE(path); path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto fail; } locked = true; mutex_lock(&trans->transaction->cache_write_mutex); if (!list_empty(&block_group->io_list)) { list_del_init(&block_group->io_list); btrfs_wait_cache_io(trans, block_group, path); btrfs_put_block_group(block_group); } /* * now that we've truncated the cache away, its no longer * setup or written */ spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_CLEAR; spin_unlock(&block_group->lock); } btrfs_i_size_write(inode, 0); truncate_pagecache(vfs_inode, 0); btrfs_lock_extent(&inode->io_tree, 0, (u64)-1, &cached_state); btrfs_drop_extent_map_range(inode, 0, (u64)-1, false); /* * We skip the throttling logic for free space cache inodes, so we don't * need to check for -EAGAIN. */ ret = btrfs_truncate_inode_items(trans, root, &control); inode_sub_bytes(&inode->vfs_inode, control.sub_bytes); btrfs_inode_safe_disk_i_size_write(inode, control.last_size); btrfs_unlock_extent(&inode->io_tree, 0, (u64)-1, &cached_state); if (ret) goto fail; ret = btrfs_update_inode(trans, inode); fail: if (locked) mutex_unlock(&trans->transaction->cache_write_mutex); if (ret) btrfs_abort_transaction(trans, ret); return ret; } static void readahead_cache(struct inode *inode) { struct file_ra_state ra; pgoff_t last_index; file_ra_state_init(&ra, inode->i_mapping); last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index); } static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, int write) { int num_pages; num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* Make sure we can fit our crcs and generation into the first page */ if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) return -ENOSPC; memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); io_ctl->pages = kzalloc_objs(struct page *, num_pages, GFP_NOFS); if (!io_ctl->pages) return -ENOMEM; io_ctl->num_pages = num_pages; io_ctl->fs_info = inode_to_fs_info(inode); io_ctl->inode = inode; return 0; } ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); static void io_ctl_free(struct btrfs_io_ctl *io_ctl) { kfree(io_ctl->pages); io_ctl->pages = NULL; } static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) { if (io_ctl->cur) { io_ctl->cur = NULL; io_ctl->orig = NULL; } } static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) { ASSERT(io_ctl->index < io_ctl->num_pages); io_ctl->page = io_ctl->pages[io_ctl->index++]; io_ctl->cur = page_address(io_ctl->page); io_ctl->orig = io_ctl->cur; io_ctl->size = PAGE_SIZE; if (clear) clear_page(io_ctl->cur); } static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) { int i; io_ctl_unmap_page(io_ctl); for (i = 0; i < io_ctl->num_pages; i++) { if (io_ctl->pages[i]) { btrfs_folio_clear_checked(io_ctl->fs_info, page_folio(io_ctl->pages[i]), page_offset(io_ctl->pages[i]), PAGE_SIZE); unlock_page(io_ctl->pages[i]); put_page(io_ctl->pages[i]); } } } static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) { struct folio *folio; struct inode *inode = io_ctl->inode; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int i; for (i = 0; i < io_ctl->num_pages; i++) { int ret; folio = __filemap_get_folio(inode->i_mapping, i, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask); if (IS_ERR(folio)) { io_ctl_drop_pages(io_ctl); return PTR_ERR(folio); } ret = set_folio_extent_mapped(folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); io_ctl_drop_pages(io_ctl); return ret; } io_ctl->pages[i] = &folio->page; if (uptodate && !folio_test_uptodate(folio)) { btrfs_read_folio(NULL, folio); folio_lock(folio); if (folio->mapping != inode->i_mapping) { btrfs_err(BTRFS_I(inode)->root->fs_info, "free space cache page truncated"); io_ctl_drop_pages(io_ctl); return -EIO; } if (!folio_test_uptodate(folio)) { btrfs_err(BTRFS_I(inode)->root->fs_info, "error reading free space cache"); io_ctl_drop_pages(io_ctl); return -EIO; } } } for (i = 0; i < io_ctl->num_pages; i++) clear_page_dirty_for_io(io_ctl->pages[i]); return 0; } static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) { io_ctl_map_page(io_ctl, 1); /* * Skip the csum areas. If we don't check crcs then we just have a * 64bit chunk at the front of the first page. */ io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); put_unaligned_le64(generation, io_ctl->cur); io_ctl->cur += sizeof(u64); } static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) { u64 cache_gen; /* * Skip the crc area. If we don't check crcs then we just have a 64bit * chunk at the front of the first page. */ io_ctl->cur += sizeof(u32) * io_ctl->num_pages; io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); cache_gen = get_unaligned_le64(io_ctl->cur); if (cache_gen != generation) { btrfs_err_rl(io_ctl->fs_info, "space cache generation (%llu) does not match inode (%llu)", cache_gen, generation); io_ctl_unmap_page(io_ctl); return -EIO; } io_ctl->cur += sizeof(u64); return 0; } static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) { u32 *tmp; u32 crc = ~(u32)0; unsigned offset = 0; if (index == 0) offset = sizeof(u32) * io_ctl->num_pages; crc = crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); btrfs_crc32c_final(crc, (u8 *)&crc); io_ctl_unmap_page(io_ctl); tmp = page_address(io_ctl->pages[0]); tmp += index; *tmp = crc; } static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) { u32 *tmp, val; u32 crc = ~(u32)0; unsigned offset = 0; if (index == 0) offset = sizeof(u32) * io_ctl->num_pages; tmp = page_address(io_ctl->pages[0]); tmp += index; val = *tmp; io_ctl_map_page(io_ctl, 0); crc = crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); btrfs_crc32c_final(crc, (u8 *)&crc); if (val != crc) { btrfs_err_rl(io_ctl->fs_info, "csum mismatch on free space cache"); io_ctl_unmap_page(io_ctl); return -EIO; } return 0; } static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, void *bitmap) { struct btrfs_free_space_entry *entry; if (!io_ctl->cur) return -ENOSPC; entry = io_ctl->cur; put_unaligned_le64(offset, &entry->offset); put_unaligned_le64(bytes, &entry->bytes); entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : BTRFS_FREE_SPACE_EXTENT; io_ctl->cur += sizeof(struct btrfs_free_space_entry); io_ctl->size -= sizeof(struct btrfs_free_space_entry); if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) return 0; io_ctl_set_crc(io_ctl, io_ctl->index - 1); /* No more pages to map */ if (io_ctl->index >= io_ctl->num_pages) return 0; /* map the next page */ io_ctl_map_page(io_ctl, 1); return 0; } static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) { if (!io_ctl->cur) return -ENOSPC; /* * If we aren't at the start of the current page, unmap this one and * map the next one if there is any left. */ if (io_ctl->cur != io_ctl->orig) { io_ctl_set_crc(io_ctl, io_ctl->index - 1); if (io_ctl->index >= io_ctl->num_pages) return -ENOSPC; io_ctl_map_page(io_ctl, 0); } copy_page(io_ctl->cur, bitmap); io_ctl_set_crc(io_ctl, io_ctl->index - 1); if (io_ctl->index < io_ctl->num_pages) io_ctl_map_page(io_ctl, 0); return 0; } static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) { /* * If we're not on the boundary we know we've modified the page and we * need to crc the page. */ if (io_ctl->cur != io_ctl->orig) io_ctl_set_crc(io_ctl, io_ctl->index - 1); else io_ctl_unmap_page(io_ctl); while (io_ctl->index < io_ctl->num_pages) { io_ctl_map_page(io_ctl, 1); io_ctl_set_crc(io_ctl, io_ctl->index - 1); } } static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, struct btrfs_free_space *entry, u8 *type) { struct btrfs_free_space_entry *e; int ret; if (!io_ctl->cur) {