Total coverage: 361753 (19%)of 1995341
153 153 153 153 152 152 152 27 27 27 53 27 52 52 52 144 150 150 144 144 18 18 18 144 144 150 150 144 143 144 144 144 144 150 150 150 150 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 // SPDX-License-Identifier: GPL-2.0-or-later /* * LED state routines for driver control interface * Copyright (c) 2021 by Jaroslav Kysela <perex@perex.cz> */ #include <linux/slab.h> #include <linux/module.h> #include <linux/leds.h> #include <sound/core.h> #include <sound/control.h> MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("ALSA control interface to LED trigger code."); MODULE_LICENSE("GPL"); #define MAX_LED (((SNDRV_CTL_ELEM_ACCESS_MIC_LED - SNDRV_CTL_ELEM_ACCESS_SPK_LED) \ >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) + 1) #define to_led_card_dev(_dev) \ container_of(_dev, struct snd_ctl_led_card, dev) enum snd_ctl_led_mode { MODE_FOLLOW_MUTE = 0, MODE_FOLLOW_ROUTE, MODE_OFF, MODE_ON, }; struct snd_ctl_led_card { struct device dev; int number; struct snd_ctl_led *led; }; struct snd_ctl_led { struct device dev; struct list_head controls; const char *name; unsigned int group; enum led_audio trigger_type; enum snd_ctl_led_mode mode; struct snd_ctl_led_card *cards[SNDRV_CARDS]; }; struct snd_ctl_led_ctl { struct list_head list; struct snd_card *card; unsigned int access; struct snd_kcontrol *kctl; unsigned int index_offset; }; static DEFINE_MUTEX(snd_ctl_led_mutex); static bool snd_ctl_led_card_valid[SNDRV_CARDS]; static struct led_trigger *snd_ctl_ledtrig_audio[NUM_AUDIO_LEDS]; static struct snd_ctl_led snd_ctl_leds[MAX_LED] = { { .name = "speaker", .group = (SNDRV_CTL_ELEM_ACCESS_SPK_LED >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) - 1, .trigger_type = LED_AUDIO_MUTE, .mode = MODE_FOLLOW_MUTE, }, { .name = "mic", .group = (SNDRV_CTL_ELEM_ACCESS_MIC_LED >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) - 1, .trigger_type = LED_AUDIO_MICMUTE, .mode = MODE_FOLLOW_MUTE, }, }; static void snd_ctl_led_sysfs_add(struct snd_card *card); static void snd_ctl_led_sysfs_remove(struct snd_card *card); #define UPDATE_ROUTE(route, cb) \ do { \ int route2 = (cb); \ if (route2 >= 0) \ route = route < 0 ? route2 : (route | route2); \ } while (0) static inline unsigned int access_to_group(unsigned int access) { return ((access & SNDRV_CTL_ELEM_ACCESS_LED_MASK) >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) - 1; } static inline unsigned int group_to_access(unsigned int group) { return (group + 1) << SNDRV_CTL_ELEM_ACCESS_LED_SHIFT; } static struct snd_ctl_led *snd_ctl_led_get_by_access(unsigned int access) { unsigned int group = access_to_group(access); if (group >= MAX_LED) return NULL; return &snd_ctl_leds[group]; } /* * A note for callers: * The two static variables info and value are protected using snd_ctl_led_mutex. */ static int snd_ctl_led_get(struct snd_ctl_led_ctl *lctl) { static struct snd_ctl_elem_info info; static struct snd_ctl_elem_value value; struct snd_kcontrol *kctl = lctl->kctl; unsigned int i; int result; memset(&info, 0, sizeof(info)); info.id = kctl->id; info.id.index += lctl->index_offset; info.id.numid += lctl->index_offset; result = kctl->info(kctl, &info); if (result < 0) return -1; memset(&value, 0, sizeof(value)); value.id = info.id; result = kctl->get(kctl, &value); if (result < 0) return -1; if (info.type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || info.type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < info.count; i++) if (value.value.integer.value[i] != info.value.integer.min) return 1; } else if (info.type == SNDRV_CTL_ELEM_TYPE_INTEGER64) { for (i = 0; i < info.count; i++) if (value.value.integer64.value[i] != info.value.integer64.min) return 1; } return 0; } static void snd_ctl_led_set_state(struct snd_card *card, unsigned int access, struct snd_kcontrol *kctl, unsigned int ioff) { struct snd_ctl_led *led; struct snd_ctl_led_ctl *lctl; int route; bool found; led = snd_ctl_led_get_by_access(access); if (!led) return; route = -1; found = false; scoped_guard(mutex, &snd_ctl_led_mutex) { /* the card may not be registered (active) at this point */ if (card && !snd_ctl_led_card_valid[card->number]) return; list_for_each_entry(lctl, &led->controls, list) { if (lctl->kctl == kctl && lctl->index_offset == ioff) found = true; UPDATE_ROUTE(route, snd_ctl_led_get(lctl)); } if (!found && kctl && card) { lctl = kzalloc(sizeof(*lctl), GFP_KERNEL); if (lctl) { lctl->card = card; lctl->access = access; lctl->kctl = kctl; lctl->index_offset = ioff; list_add(&lctl->list, &led->controls); UPDATE_ROUTE(route, snd_ctl_led_get(lctl)); } } } switch (led->mode) { case MODE_OFF: route = 1; break; case MODE_ON: route = 0; break; case MODE_FOLLOW_ROUTE: if (route >= 0) route ^= 1; break; case MODE_FOLLOW_MUTE: /* noop */ break; } if (route >= 0) { struct led_trigger *trig = snd_ctl_ledtrig_audio[led->trigger_type]; led_trigger_event(trig, route ? LED_OFF : LED_ON); } } static struct snd_ctl_led_ctl *snd_ctl_led_find(struct snd_kcontrol *kctl, unsigned int ioff) { struct list_head *controls; struct snd_ctl_led_ctl *lctl; unsigned int group; for (group = 0; group < MAX_LED; group++) { controls = &snd_ctl_leds[group].controls; list_for_each_entry(lctl, controls, list) if (lctl->kctl == kctl && lctl->index_offset == ioff) return lctl; } return NULL; } static unsigned int snd_ctl_led_remove(struct snd_kcontrol *kctl, unsigned int ioff, unsigned int access) { struct snd_ctl_led_ctl *lctl; unsigned int ret = 0; guard(mutex)(&snd_ctl_led_mutex); lctl = snd_ctl_led_find(kctl, ioff); if (lctl && (access == 0 || access != lctl->access)) { ret = lctl->access; list_del(&lctl->list); kfree(lctl); } return ret; } static void snd_ctl_led_notify(struct snd_card *card, unsigned int mask, struct snd_kcontrol *kctl, unsigned int ioff) { struct snd_kcontrol_volatile *vd; unsigned int access, access2; if (mask == SNDRV_CTL_EVENT_MASK_REMOVE) { access = snd_ctl_led_remove(kctl, ioff, 0); if (access) snd_ctl_led_set_state(card, access, NULL, 0); } else if (mask & SNDRV_CTL_EVENT_MASK_INFO) { vd = &kctl->vd[ioff]; access = vd->access & SNDRV_CTL_ELEM_ACCESS_LED_MASK; access2 = snd_ctl_led_remove(kctl, ioff, access); if (access2) snd_ctl_led_set_state(card, access2, NULL, 0); if (access) snd_ctl_led_set_state(card, access, kctl, ioff); } else if ((mask & (SNDRV_CTL_EVENT_MASK_ADD | SNDRV_CTL_EVENT_MASK_VALUE)) != 0) { vd = &kctl->vd[ioff]; access = vd->access & SNDRV_CTL_ELEM_ACCESS_LED_MASK; if (access) snd_ctl_led_set_state(card, access, kctl, ioff); } } DEFINE_FREE(snd_card_unref, struct snd_card *, if (_T) snd_card_unref(_T)) static int snd_ctl_led_set_id(int card_number, struct snd_ctl_elem_id *id, unsigned int group, bool set) { struct snd_card *card __free(snd_card_unref) = NULL; struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int ioff, access, new_access; card = snd_card_ref(card_number); if (!card) return -ENXIO; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (!kctl) return -ENOENT; ioff = snd_ctl_get_ioff(kctl, id); vd = &kctl->vd[ioff]; access = vd->access & SNDRV_CTL_ELEM_ACCESS_LED_MASK; if (access != 0 && access != group_to_access(group)) return -EXDEV; new_access = vd->access & ~SNDRV_CTL_ELEM_ACCESS_LED_MASK; if (set) new_access |= group_to_access(group); if (new_access != vd->access) { vd->access = new_access; snd_ctl_led_notify(card, SNDRV_CTL_EVENT_MASK_INFO, kctl, ioff); } return 0; } static void snd_ctl_led_refresh(void) { unsigned int group; for (group = 0; group < MAX_LED; group++) snd_ctl_led_set_state(NULL, group_to_access(group), NULL, 0); } static void snd_ctl_led_ctl_destroy(struct snd_ctl_led_ctl *lctl) { list_del(&lctl->list); kfree(lctl); } static void snd_ctl_led_clean(struct snd_card *card) { unsigned int group; struct snd_ctl_led_ctl *lctl, *_lctl; struct snd_ctl_led *led; for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; list_for_each_entry_safe(lctl, _lctl, &led->controls, list) if (!card || lctl->card == card) snd_ctl_led_ctl_destroy(lctl); } } static int snd_ctl_led_reset(int card_number, unsigned int group) { struct snd_card *card __free(snd_card_unref) = NULL; struct snd_ctl_led_ctl *lctl, *_lctl; struct snd_ctl_led *led; struct snd_kcontrol_volatile *vd; bool change = false; card = snd_card_ref(card_number); if (!card) return -ENXIO; scoped_guard(mutex, &snd_ctl_led_mutex) { if (!snd_ctl_led_card_valid[card_number]) return -ENXIO; led = &snd_ctl_leds[group]; list_for_each_entry_safe(lctl, _lctl, &led->controls, list) if (lctl->card == card) { vd = &lctl->kctl->vd[lctl->index_offset]; vd->access &= ~group_to_access(group); snd_ctl_led_ctl_destroy(lctl); change = true; } } if (change) snd_ctl_led_set_state(NULL, group_to_access(group), NULL, 0); return 0; } static void snd_ctl_led_register(struct snd_card *card) { struct snd_kcontrol *kctl; unsigned int ioff; if (snd_BUG_ON(card->number < 0 || card->number >= ARRAY_SIZE(snd_ctl_led_card_valid))) return; scoped_guard(mutex, &snd_ctl_led_mutex) snd_ctl_led_card_valid[card->number] = true; /* the register callback is already called with held card->controls_rwsem */ list_for_each_entry(kctl, &card->controls, list) for (ioff = 0; ioff < kctl->count; ioff++) snd_ctl_led_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, kctl, ioff); snd_ctl_led_refresh(); snd_ctl_led_sysfs_add(card); } static void snd_ctl_led_disconnect(struct snd_card *card) { snd_ctl_led_sysfs_remove(card); scoped_guard(mutex, &snd_ctl_led_mutex) { snd_ctl_led_card_valid[card->number] = false; snd_ctl_led_clean(card); } snd_ctl_led_refresh(); } static void snd_ctl_led_card_release(struct device *dev) { struct snd_ctl_led_card *led_card = to_led_card_dev(dev); kfree(led_card); } static void snd_ctl_led_release(struct device *dev) { } static void snd_ctl_led_dev_release(struct device *dev) { } /* * sysfs */ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_ctl_led *led = container_of(dev, struct snd_ctl_led, dev); const char *str = NULL; switch (led->mode) { case MODE_FOLLOW_MUTE: str = "follow-mute"; break; case MODE_FOLLOW_ROUTE: str = "follow-route"; break; case MODE_ON: str = "on"; break; case MODE_OFF: str = "off"; break; } return sysfs_emit(buf, "%s\n", str); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led *led = container_of(dev, struct snd_ctl_led, dev); char _buf[16]; size_t l = min(count, sizeof(_buf) - 1); enum snd_ctl_led_mode mode; memcpy(_buf, buf, l); _buf[l] = '\0'; if (strstr(_buf, "mute")) mode = MODE_FOLLOW_MUTE; else if (strstr(_buf, "route")) mode = MODE_FOLLOW_ROUTE; else if (strncmp(_buf, "off", 3) == 0 || strncmp(_buf, "0", 1) == 0) mode = MODE_OFF; else if (strncmp(_buf, "on", 2) == 0 || strncmp(_buf, "1", 1) == 0) mode = MODE_ON; else return count; scoped_guard(mutex, &snd_ctl_led_mutex) led->mode = mode; snd_ctl_led_set_state(NULL, group_to_access(led->group), NULL, 0); return count; } static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_ctl_led *led = container_of(dev, struct snd_ctl_led, dev); struct led_trigger *trig = snd_ctl_ledtrig_audio[led->trigger_type]; return sysfs_emit(buf, "%u\n", led_trigger_get_brightness(trig)); } static DEVICE_ATTR_RW(mode); static DEVICE_ATTR_RO(brightness); static struct attribute *snd_ctl_led_dev_attrs[] = { &dev_attr_mode.attr, &dev_attr_brightness.attr, NULL, }; static const struct attribute_group snd_ctl_led_dev_attr_group = { .attrs = snd_ctl_led_dev_attrs, }; static const struct attribute_group *snd_ctl_led_dev_attr_groups[] = { &snd_ctl_led_dev_attr_group, NULL, }; static char *find_eos(char *s) { while (*s && *s != ',') s++; if (*s) s++; return s; } static char *parse_uint(char *s, unsigned int *val) { unsigned long long res; if (kstrtoull(s, 10, &res)) res = 0; *val = res; return find_eos(s); } static char *parse_string(char *s, char *val, size_t val_size) { if (*s == '"' || *s == '\'') { char c = *s; s++; while (*s && *s != c) { if (val_size > 1) { *val++ = *s; val_size--; } s++; } } else { while (*s && *s != ',') { if (val_size > 1) { *val++ = *s; val_size--; } s++; } } *val = '\0'; if (*s) s++; return s; } static char *parse_iface(char *s, snd_ctl_elem_iface_t *val) { if (!strncasecmp(s, "card", 4)) *val = SNDRV_CTL_ELEM_IFACE_CARD; else if (!strncasecmp(s, "mixer", 5)) *val = SNDRV_CTL_ELEM_IFACE_MIXER; return find_eos(s); } /* * These types of input strings are accepted: * * unsigned integer - numid (equivaled to numid=UINT) * string - basic mixer name (equivalent to iface=MIXER,name=STR) * numid=UINT * [iface=MIXER,][device=UINT,][subdevice=UINT,]name=STR[,index=UINT] */ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, size_t count, bool attach) { char buf2[256], *s, *os; struct snd_ctl_elem_id id; int err; if (strscpy(buf2, buf, sizeof(buf2)) < 0) return -E2BIG; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; s = buf2; while (*s) { os = s; if (!strncasecmp(s, "numid=", 6)) { s = parse_uint(s + 6, &id.numid); } else if (!strncasecmp(s, "iface=", 6)) { s = parse_iface(s + 6, &id.iface); } else if (!strncasecmp(s, "device=", 7)) { s = parse_uint(s + 7, &id.device); } else if (!strncasecmp(s, "subdevice=", 10)) { s = parse_uint(s + 10, &id.subdevice); } else if (!strncasecmp(s, "name=", 5)) { s = parse_string(s + 5, id.name, sizeof(id.name)); } else if (!strncasecmp(s, "index=", 6)) { s = parse_uint(s + 6, &id.index); } else if (s == buf2) { while (*s) { if (*s < '0' || *s > '9') break; s++; } if (*s == '\0') parse_uint(buf2, &id.numid); else { for (; *s >= ' '; s++); *s = '\0'; strscpy(id.name, buf2, sizeof(id.name)); } break; } if (*s == ',') s++; if (s == os) break; } err = snd_ctl_led_set_id(led_card->number, &id, led_card->led->group, attach); if (err < 0) return err; return count; } static ssize_t attach_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); return set_led_id(led_card, buf, count, true); } static ssize_t detach_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); return set_led_id(led_card, buf, count, false); } static ssize_t reset_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); int err; if (count > 0 && buf[0] == '1') { err = snd_ctl_led_reset(led_card->number, led_card->led->group); if (err < 0) return err; } return count; } static ssize_t list_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); struct snd_card *card __free(snd_card_unref) = NULL; struct snd_ctl_led_ctl *lctl; size_t l = 0; card = snd_card_ref(led_card->number); if (!card) return -ENXIO; guard(rwsem_read)(&card->controls_rwsem); guard(mutex)(&snd_ctl_led_mutex); if (snd_ctl_led_card_valid[led_card->number]) { list_for_each_entry(lctl, &led_card->led->controls, list) { if (lctl->card != card) continue; if (l) l += sysfs_emit_at(buf, l, " "); l += sysfs_emit_at(buf, l, "%u", lctl->kctl->id.numid + lctl->index_offset); } } return l; } static DEVICE_ATTR_WO(attach); static DEVICE_ATTR_WO(detach); static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_RO(list); static struct attribute *snd_ctl_led_card_attrs[] = { &dev_attr_attach.attr, &dev_attr_detach.attr, &dev_attr_reset.attr, &dev_attr_list.attr, NULL, }; static const struct attribute_group snd_ctl_led_card_attr_group = { .attrs = snd_ctl_led_card_attrs, }; static const struct attribute_group *snd_ctl_led_card_attr_groups[] = { &snd_ctl_led_card_attr_group, NULL, }; static struct device snd_ctl_led_dev; static void snd_ctl_led_sysfs_add(struct snd_card *card) { unsigned int group; struct snd_ctl_led_card *led_card; struct snd_ctl_led *led; char link_name[32]; for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; led_card = kzalloc(sizeof(*led_card), GFP_KERNEL); if (!led_card) goto cerr2; led_card->number = card->number; led_card->led = led; device_initialize(&led_card->dev); led_card->dev.release = snd_ctl_led_card_release; if (dev_set_name(&led_card->dev, "card%d", card->number) < 0) goto cerr; led_card->dev.parent = &led->dev; led_card->dev.groups = snd_ctl_led_card_attr_groups; if (device_add(&led_card->dev)) goto cerr; led->cards[card->number] = led_card; snprintf(link_name, sizeof(link_name), "led-%s", led->name); if (sysfs_create_link(&card->ctl_dev->kobj, &led_card->dev.kobj, link_name)) dev_err(card->dev, "%s: can't create symlink to controlC%i device\n", __func__, card->number); if (sysfs_create_link(&led_card->dev.kobj, &card->card_dev.kobj, "card")) dev_err(card->dev, "%s: can't create symlink to card%i\n", __func__, card->number); continue; cerr: put_device(&led_card->dev); cerr2: dev_err(card->dev, "snd_ctl_led: unable to add card%d", card->number); } } static void snd_ctl_led_sysfs_remove(struct snd_card *card) { unsigned int group; struct snd_ctl_led_card *led_card; struct snd_ctl_led *led; char link_name[32]; for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; led_card = led->cards[card->number]; if (!led_card) continue; snprintf(link_name, sizeof(link_name), "led-%s", led->name); sysfs_remove_link(&card->ctl_dev->kobj, link_name); sysfs_remove_link(&led_card->dev.kobj, "card"); device_unregister(&led_card->dev); led->cards[card->number] = NULL; } } /* * Control layer registration */ static struct snd_ctl_layer_ops snd_ctl_led_lops = { .module_name = SND_CTL_LAYER_MODULE_LED, .lregister = snd_ctl_led_register, .ldisconnect = snd_ctl_led_disconnect, .lnotify = snd_ctl_led_notify, }; static int __init snd_ctl_led_init(void) { struct snd_ctl_led *led; unsigned int group; led_trigger_register_simple("audio-mute", &snd_ctl_ledtrig_audio[LED_AUDIO_MUTE]); led_trigger_register_simple("audio-micmute", &snd_ctl_ledtrig_audio[LED_AUDIO_MICMUTE]); device_initialize(&snd_ctl_led_dev); snd_ctl_led_dev.class = &sound_class; snd_ctl_led_dev.release = snd_ctl_led_dev_release; dev_set_name(&snd_ctl_led_dev, "ctl-led"); if (device_add(&snd_ctl_led_dev)) { put_device(&snd_ctl_led_dev); return -ENOMEM; } for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; INIT_LIST_HEAD(&led->controls); device_initialize(&led->dev); led->dev.parent = &snd_ctl_led_dev; led->dev.release = snd_ctl_led_release; led->dev.groups = snd_ctl_led_dev_attr_groups; dev_set_name(&led->dev, led->name); if (device_add(&led->dev)) { put_device(&led->dev); for (; group > 0; group--) { led = &snd_ctl_leds[group - 1]; device_unregister(&led->dev); } device_unregister(&snd_ctl_led_dev); return -ENOMEM; } } snd_ctl_register_layer(&snd_ctl_led_lops); return 0; } static void __exit snd_ctl_led_exit(void) { struct snd_ctl_led *led; struct snd_card *card; unsigned int group, card_number; snd_ctl_disconnect_layer(&snd_ctl_led_lops); for (card_number = 0; card_number < SNDRV_CARDS; card_number++) { if (!snd_ctl_led_card_valid[card_number]) continue; card = snd_card_ref(card_number); if (card) { snd_ctl_led_sysfs_remove(card); snd_card_unref(card); } } for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; device_unregister(&led->dev); } device_unregister(&snd_ctl_led_dev); snd_ctl_led_clean(NULL); led_trigger_unregister_simple(snd_ctl_ledtrig_audio[LED_AUDIO_MUTE]); led_trigger_unregister_simple(snd_ctl_ledtrig_audio[LED_AUDIO_MICMUTE]); } module_init(snd_ctl_led_init) module_exit(snd_ctl_led_exit) MODULE_ALIAS("ledtrig:audio-mute"); MODULE_ALIAS("ledtrig:audio-micmute");
1 1 1 1 1 1 1 8 8 8 7 3 6 5 4 4 3 1 1 3 3 3 3 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/capability.h> #include <linux/if.h> #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/list.h> #include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/tcp.h> #include <net/ip.h> #include <net/tcp.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_log.h> #include <linux/netfilter/nfnetlink_osf.h> /* * Indexed by dont-fragment bit. * It is the only constant value in the fingerprint. */ struct list_head nf_osf_fingers[2]; EXPORT_SYMBOL_GPL(nf_osf_fingers); static inline int nf_osf_ttl(const struct sk_buff *skb, int ttl_check, unsigned char f_ttl) { struct in_device *in_dev = __in_dev_get_rcu(skb->dev); const struct iphdr *ip = ip_hdr(skb); const struct in_ifaddr *ifa; int ret = 0; if (ttl_check == NF_OSF_TTL_TRUE) return ip->ttl == f_ttl; if (ttl_check == NF_OSF_TTL_NOCHECK) return 1; else if (ip->ttl <= f_ttl) return 1; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (inet_ifa_match(ip->saddr, ifa)) { ret = (ip->ttl == f_ttl); break; } } return ret; } struct nf_osf_hdr_ctx { bool df; u16 window; u16 totlen; const unsigned char *optp; unsigned int optsize; }; static bool nf_osf_match_one(const struct sk_buff *skb, const struct nf_osf_user_finger *f, int ttl_check, struct nf_osf_hdr_ctx *ctx) { const __u8 *optpinit = ctx->optp; unsigned int check_WSS = 0; int fmatch = FMATCH_WRONG; int foptsize, optnum; u16 mss = 0; if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl)) return false; /* * Should not happen if userspace parser was written correctly. */ if (f->wss.wc >= OSF_WSS_MAX) return false; /* Check options */ foptsize = 0; for (optnum = 0; optnum < f->opt_num; ++optnum) foptsize += f->opt[optnum].length; if (foptsize > MAX_IPOPTLEN || ctx->optsize > MAX_IPOPTLEN || ctx->optsize != foptsize) return false; check_WSS = f->wss.wc; for (optnum = 0; optnum < f->opt_num; ++optnum) { if (f->opt[optnum].kind == *ctx->optp) { __u32 len = f->opt[optnum].length; const __u8 *optend = ctx->optp + len; fmatch = FMATCH_OK; switch (*ctx->optp) { case OSFOPT_MSS: mss = ctx->optp[3]; mss <<= 8; mss |= ctx->optp[2]; mss = ntohs((__force __be16)mss); break; case OSFOPT_TS: break; } ctx->optp = optend; } else fmatch = FMATCH_OPT_WRONG; if (fmatch != FMATCH_OK) break; } if (fmatch != FMATCH_OPT_WRONG) { fmatch = FMATCH_WRONG; switch (check_WSS) { case OSF_WSS_PLAIN: if (f->wss.val == 0 || ctx->window == f->wss.val) fmatch = FMATCH_OK; break; case OSF_WSS_MSS: /* * Some smart modems decrease mangle MSS to * SMART_MSS_2, so we check standard, decreased * and the one provided in the fingerprint MSS * values. */ #define SMART_MSS_1 1460 #define SMART_MSS_2 1448 if (ctx->window == f->wss.val * mss || ctx->window == f->wss.val * SMART_MSS_1 || ctx->window == f->wss.val * SMART_MSS_2) fmatch = FMATCH_OK; break; case OSF_WSS_MTU: if (ctx->window == f->wss.val * (mss + 40) || ctx->window == f->wss.val * (SMART_MSS_1 + 40) || ctx->window == f->wss.val * (SMART_MSS_2 + 40)) fmatch = FMATCH_OK; break; case OSF_WSS_MODULO: if ((ctx->window % f->wss.val) == 0) fmatch = FMATCH_OK; break; } } if (fmatch != FMATCH_OK) ctx->optp = optpinit; return fmatch == FMATCH_OK; } static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx, const struct sk_buff *skb, const struct iphdr *ip, unsigned char *opts, struct tcphdr *_tcph) { const struct tcphdr *tcp; tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph); if (!tcp) return NULL; if (!tcp->syn) return NULL; ctx->totlen = ntohs(ip->tot_len); ctx->df = ntohs(ip->frag_off) & IP_DF; ctx->window = ntohs(tcp->window); if (tcp->doff * 4 > sizeof(struct tcphdr)) { ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr); ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) + sizeof(struct tcphdr), ctx->optsize, opts); if (!ctx->optp) return NULL; } return tcp; } bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, int hooknum, struct net_device *in, struct net_device *out, const struct nf_osf_info *info, struct net *net, const struct list_head *nf_osf_fingers) { const struct iphdr *ip = ip_hdr(skb); const struct nf_osf_user_finger *f; unsigned char opts[MAX_IPOPTLEN]; const struct nf_osf_finger *kf; int fcount = 0, ttl_check; int fmatch = FMATCH_WRONG; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; struct tcphdr _tcph; memset(&ctx, 0, sizeof(ctx)); tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); if (!tcp) return false; ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre)) continue; if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) continue; fmatch = FMATCH_OK; fcount++; if (info->flags & NF_OSF_LOG) nf_log_packet(net, family, hooknum, skb, in, out, NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest), f->ttl - ip->ttl); if ((info->flags & NF_OSF_LOG) && info->loglevel == NF_OSF_LOGLEVEL_FIRST) break; } if (!fcount && (info->flags & NF_OSF_LOG)) nf_log_packet(net, family, hooknum, skb, in, out, NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); if (fcount) fmatch = FMATCH_OK; return fmatch == FMATCH_OK; } EXPORT_SYMBOL_GPL(nf_osf_match); bool nf_osf_find(const struct sk_buff *skb, const struct list_head *nf_osf_fingers, const int ttl_check, struct nf_osf_data *data) { const struct iphdr *ip = ip_hdr(skb); const struct nf_osf_user_finger *f; unsigned char opts[MAX_IPOPTLEN]; const struct nf_osf_finger *kf; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; struct tcphdr _tcph; bool found = false; memset(&ctx, 0, sizeof(ctx)); tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); if (!tcp) return false; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) continue; data->genre = f->genre; data->version = f->version; found = true; break; } return found; } EXPORT_SYMBOL_GPL(nf_osf_find); static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = { [OSF_ATTR_FINGER] = { .len = sizeof(struct nf_osf_user_finger) }, }; static int nfnl_osf_add_callback(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const osf_attrs[]) { struct nf_osf_user_finger *f; struct nf_osf_finger *kf = NULL, *sf; int err = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -EINVAL; f = nla_data(osf_attrs[OSF_ATTR_FINGER]); if (f->opt_num > ARRAY_SIZE(f->opt)) return -EINVAL; if (!memchr(f->genre, 0, MAXGENRELEN) || !memchr(f->subtype, 0, MAXGENRELEN) || !memchr(f->version, 0, MAXGENRELEN)) return -EINVAL; kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL); if (!kf) return -ENOMEM; memcpy(&kf->finger, f, sizeof(struct nf_osf_user_finger)); list_for_each_entry(sf, &nf_osf_fingers[!!f->df], finger_entry) { if (memcmp(&sf->finger, f, sizeof(struct nf_osf_user_finger))) continue; kfree(kf); kf = NULL; if (info->nlh->nlmsg_flags & NLM_F_EXCL) err = -EEXIST; break; } /* * We are protected by nfnl mutex. */ if (kf) list_add_tail_rcu(&kf->finger_entry, &nf_osf_fingers[!!f->df]); return err; } static int nfnl_osf_remove_callback(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const osf_attrs[]) { struct nf_osf_user_finger *f; struct nf_osf_finger *sf; int err = -ENOENT; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; f = nla_data(osf_attrs[OSF_ATTR_FINGER]); list_for_each_entry(sf, &nf_osf_fingers[!!f->df], finger_entry) { if (memcmp(&sf->finger, f, sizeof(struct nf_osf_user_finger))) continue; /* * We are protected by nfnl mutex. */ list_del_rcu(&sf->finger_entry); kfree_rcu(sf, rcu_head); err = 0; break; } return err; } static const struct nfnl_callback nfnl_osf_callbacks[OSF_MSG_MAX] = { [OSF_MSG_ADD] = { .call = nfnl_osf_add_callback, .type = NFNL_CB_MUTEX, .attr_count = OSF_ATTR_MAX, .policy = nfnl_osf_policy, }, [OSF_MSG_REMOVE] = { .call = nfnl_osf_remove_callback, .type = NFNL_CB_MUTEX, .attr_count = OSF_ATTR_MAX, .policy = nfnl_osf_policy, }, }; static const struct nfnetlink_subsystem nfnl_osf_subsys = { .name = "osf", .subsys_id = NFNL_SUBSYS_OSF, .cb_count = OSF_MSG_MAX, .cb = nfnl_osf_callbacks, }; static int __init nfnl_osf_init(void) { int err = -EINVAL; int i; for (i = 0; i < ARRAY_SIZE(nf_osf_fingers); ++i) INIT_LIST_HEAD(&nf_osf_fingers[i]); err = nfnetlink_subsys_register(&nfnl_osf_subsys); if (err < 0) { pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err); goto err_out_exit; } return 0; err_out_exit: return err; } static void __exit nfnl_osf_fini(void) { struct nf_osf_finger *f; int i; nfnetlink_subsys_unregister(&nfnl_osf_subsys); rcu_read_lock(); for (i = 0; i < ARRAY_SIZE(nf_osf_fingers); ++i) { list_for_each_entry_rcu(f, &nf_osf_fingers[i], finger_entry) { list_del_rcu(&f->finger_entry); kfree_rcu(f, rcu_head); } } rcu_read_unlock(); rcu_barrier(); } module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Passive OS fingerprint matching"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/bvec.h> #include <linux/crc32c.h> #include <linux/net.h> #include <linux/socket.h> #include <net/sock.h> #include <linux/ceph/ceph_features.h> #include <linux/ceph/decode.h> #include <linux/ceph/libceph.h> #include <linux/ceph/messenger.h> /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; /* * If @buf is NULL, discard up to @len bytes. */ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) { struct kvec iov = {buf, len}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; if (!buf) msg.msg_flags |= MSG_TRUNC; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; } static int ceph_tcp_recvpage(struct socket *sock, struct page *page, int page_offset, size_t length) { struct bio_vec bvec; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; BUG_ON(page_offset + length > PAGE_SIZE); bvec_set_page(&bvec, page, length, page_offset); iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; } /* * write something. @more is true if caller will be sending more data * shortly. */ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, size_t kvlen, size_t len, bool more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; if (more) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ r = kernel_sendmsg(sock, &msg, iov, kvlen, len); if (r == -EAGAIN) r = 0; return r; } /* * @more: MSG_MORE or 0. */ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more, }; struct bio_vec bvec; int ret; /* * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0, * we need to fall back to sendmsg if that's the case. * * Same goes for slab pages: skb_can_coalesce() allows * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ if (sendpage_ok(page)) msg.msg_flags |= MSG_SPLICE_PAGES; bvec_set_page(&bvec, page, size, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); ret = sock_sendmsg(sock, &msg); if (ret == -EAGAIN) ret = 0; return ret; } static void con_out_kvec_reset(struct ceph_connection *con) { BUG_ON(con->v1.out_skip); con->v1.out_kvec_left = 0; con->v1.out_kvec_bytes = 0; con->v1.out_kvec_cur = &con->v1.out_kvec[0]; } static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { int index = con->v1.out_kvec_left; BUG_ON(con->v1.out_skip); BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec)); con->v1.out_kvec[index].iov_len = size; con->v1.out_kvec[index].iov_base = data; con->v1.out_kvec_left++; con->v1.out_kvec_bytes += size; } /* * Chop off a kvec from the end. Return residual number of bytes for * that kvec, i.e. how many bytes would have been written if the kvec * hadn't been nuked. */ static int con_out_kvec_skip(struct ceph_connection *con) { int skip = 0; if (con->v1.out_kvec_bytes > 0) { skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len; BUG_ON(con->v1.out_kvec_bytes < skip); BUG_ON(!con->v1.out_kvec_left); con->v1.out_kvec_bytes -= skip; con->v1.out_kvec_left--; } return skip; } static size_t sizeof_footer(struct ceph_connection *con) { return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? sizeof(struct ceph_msg_footer) : sizeof(struct ceph_msg_footer_old); } static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { /* Initialize data cursor if it's not a sparse read */ u64 len = msg->sparse_read_total ? : data_len; ceph_msg_data_cursor_init(&msg->cursor, msg, len); } /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ static void prepare_write_message_footer(struct ceph_connection *con) { struct ceph_msg *m = con->out_msg; m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); con_out_kvec_add(con, sizeof_footer(con), &m->footer); if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) con->ops->sign_message(m); else m->footer.sig = 0; } else { m->old_footer.flags = m->footer.flags; } con->v1.out_more = m->more_to_follow; con->v1.out_msg_done = true; } /* * Prepare headers for the next outgoing message. */ static void prepare_write_message(struct ceph_connection *con) { struct ceph_msg *m; u32 crc; con_out_kvec_reset(con); con->v1.out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); } ceph_con_get_out_msg(con); m = con->out_msg; dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), m->data_length); WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len)); WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { crc = crc32c(0, m->middle->vec.iov_base, m->middle->vec.iov_len); con->out_msg->footer.middle_crc = cpu_to_le32(crc); } else con->out_msg->footer.middle_crc = 0; dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; if (m->data_length) { prepare_message_data(con->out_msg, m->data_length); con->v1.out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ prepare_write_message_footer(con); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare an ack. */ static void prepare_write_ack(struct ceph_connection *con) { dout("prepare_write_ack %p %llu -> %llu\n", con, con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); con->v1.out_more = 1; /* more will follow.. eventually.. */ ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare to share the seq during handshake */ static void prepare_write_seq(struct ceph_connection *con) { dout("prepare_write_seq %p %llu -> %llu\n", con, con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; con_out_kvec_reset(con); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare to write keepalive byte. */ static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { struct timespec64 now; ktime_get_real_ts64(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2), &con->v1.out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Connection negotiation. */ static int get_connect_authorizer(struct ceph_connection *con) { struct ceph_auth_handshake *auth; int auth_proto; if (!con->ops->get_authorizer) { con->v1.auth = NULL; con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->v1.out_connect.authorizer_len = 0; return 0; } auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry); if (IS_ERR(auth)) return PTR_ERR(auth); con->v1.auth = auth; con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto); con->v1.out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); return 0; } /* * We connected to a peer and are saying hello. */ static void prepare_write_banner(struct ceph_connection *con) { con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), &con->msgr->my_enc_addr); con->v1.out_more = 0; ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } static void __prepare_write_connect(struct ceph_connection *con) { con_out_kvec_add(con, sizeof(con->v1.out_connect), &con->v1.out_connect); if (con->v1.auth) con_out_kvec_add(con, con->v1.auth->authorizer_buf_len, con->v1.auth->authorizer_buf); con->v1.out_more = 0; ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = ceph_get_global_seq(con->msgr, 0); int proto; int ret; switch (con->peer_name.type) { case CEPH_ENTITY_TYPE_MON: proto = CEPH_MONC_PROTOCOL; break; case CEPH_ENTITY_TYPE_OSD: proto = CEPH_OSDC_PROTOCOL; break; case CEPH_ENTITY_TYPE_MDS: proto = CEPH_MDSC_PROTOCOL; break; default: BUG(); } dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->v1.connect_seq, global_seq, proto); con->v1.out_connect.features = cpu_to_le64(from_msgr(con->msgr)->supported_features); con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq); con->v1.out_connect.global_seq = cpu_to_le32(global_seq); con->v1.out_connect.protocol_version = cpu_to_le32(proto); con->v1.out_connect.flags = 0; ret = get_connect_authorizer(con); if (ret) return ret; __prepare_write_connect(con); return 0; } /* * write as much of pending kvecs to the socket as we can. * 1 -> done * 0 -> socket full, but more to do * <0 -> error */ static int write_partial_kvec(struct ceph_connection *con) { int ret; dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes); while (con->v1.out_kvec_bytes > 0) { ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur, con->v1.out_kvec_left, con->v1.out_kvec_bytes, con->v1.out_more); if (ret <= 0) goto out; con->v1.out_kvec_bytes -= ret; if (!con->v1.out_kvec_bytes) break; /* done */ /* account for full iov entries consumed */ while (ret >= con->v1.out_kvec_cur->iov_len) { BUG_ON(!con->v1.out_kvec_left); ret -= con->v1.out_kvec_cur->iov_len; con->v1.out_kvec_cur++; con->v1.out_kvec_left--; } /* and for a partially-consumed entry */ if (ret) { con->v1.out_kvec_cur->iov_len -= ret; con->v1.out_kvec_cur->iov_base += ret; } } con->v1.out_kvec_left = 0; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret); return ret; /* done! */ } /* * Write as much message data payload as we can. If we finish, queue * up the footer. * 1 -> done, footer is now queued in out_kvec[]. * 0 -> socket full, but more to do * <0 -> error */ static int write_partial_message_data(struct ceph_connection *con) { struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc; dout("%s %p msg %p\n", __func__, con, msg); if (!msg->num_data_items) return -EINVAL; /* * Iterate through each page that contains data to be * written, and send as much as possible for each. * * If we are calculating the data crc (the default), we will * need to map the page. If we have no pages, they have * been revoked, so use the zero page. */ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; while (cursor->total_resid) { struct page *page; size_t page_offset; size_t length; int ret; if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, MSG_MORE); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); return ret; } if (do_datacrc && cursor->need_crc) crc = ceph_crc32c_page(crc, page, page_offset, length); ceph_msg_data_advance(cursor, (size_t)ret); } dout("%s %p msg %p done\n", __func__, con, msg); /* prepare and queue up footer, too */ if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); else msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; con_out_kvec_reset(con); prepare_write_message_footer(con); return 1; /* must return > 0 to indicate success */ } /* * write some zeros */ static int write_partial_skip(struct ceph_connection *con) { int ret; dout("%s %p %d left\n", __func__, con, con->v1.out_skip); while (con->v1.out_skip > 0) { size_t size = min(con->v1.out_skip, (int)PAGE_SIZE); ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size, MSG_MORE); if (ret <= 0) goto out; con->v1.out_skip -= ret; } ret = 1; out: return ret; } /* * Prepare to read connection handshake, or an ack. */ static void prepare_read_banner(struct ceph_connection *con) { dout("prepare_read_banner %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_connect(struct ceph_connection *con) { dout("prepare_read_connect %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_seq(struct ceph_connection *con) { dout("prepare_read_seq %p\n", con); con->v1.in_base_pos = 0; con->v1.in_tag = CEPH_MSGR_TAG_SEQ; } static void prepare_read_tag(struct ceph_connection *con) { dout("prepare_read_tag %p\n", con); con->v1.in_base_pos = 0; con->v1.in_tag = CEPH_MSGR_TAG_READY; } static void prepare_read_keepalive_ack(struct ceph_connection *con) { dout("prepare_read_keepalive_ack %p\n", con); con->v1.in_base_pos = 0; } /* * Prepare to read a message. */ static int prepare_read_message(struct ceph_connection *con) { dout("prepare_read_message %p\n", con); BUG_ON(con->in_msg != NULL); con->v1.in_base_pos = 0; con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; return 0; } static int read_partial(struct ceph_connection *con, int end, int size, void *object) { while (con->v1.in_base_pos < end) { int left = end - con->v1.in_base_pos; int have = size - left; int ret = ceph_tcp_recvmsg(con->sock, object + have, left); if (ret <= 0) return ret; con->v1.in_base_pos += ret; } return 1; } /* * Read all or part of the connect-side handshake on a new connection */ static int read_partial_banner(struct ceph_connection *con) { int size; int end; int ret; dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos); /* peer's banner */ size = strlen(CEPH_BANNER); end = size; ret = read_partial(con, end, size, con->v1.in_banner); if (ret <= 0) goto out; size = sizeof(con->v1.actual_peer_addr); end += size; ret = read_partial(con, end, size, &con->v1.actual_peer_addr); if (ret <= 0) goto out; ceph_decode_banner_addr(&con->v1.actual_peer_addr); size = sizeof(con->v1.peer_addr_for_me); end += size; ret = read_partial(con, end, size, &con->v1.peer_addr_for_me); if (ret <= 0) goto out; ceph_decode_banner_addr(&con->v1.peer_addr_for_me); out: return ret; } static int read_partial_connect(struct ceph_connection *con) { int size; int end; int ret; dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos); size = sizeof(con->v1.in_reply); end = size; ret = read_partial(con, end, size, &con->v1.in_reply); if (ret <= 0) goto out; if (con->v1.auth) { size = le32_to_cpu(con->v1.in_reply.authorizer_len); if (size > con->v1.auth->authorizer_reply_buf_len) { pr_err("authorizer reply too big: %d > %zu\n", size, con->v1.auth->authorizer_reply_buf_len); ret = -EINVAL; goto out; } end += size; ret = read_partial(con, end, size, con->v1.auth->authorizer_reply_buf); if (ret <= 0) goto out; } dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", con, con->v1.in_reply.tag, le32_to_cpu(con->v1.in_reply.connect_seq), le32_to_cpu(con->v1.in_reply.global_seq)); out: return ret; } /* * Verify the hello banner looks okay. */ static int verify_hello(struct ceph_connection *con) { if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { pr_err("connect to %s got bad banner\n", ceph_pr_addr(&con->peer_addr)); con->error_msg = "protocol error, bad banner"; return -1; } return 0; } static int process_banner(struct ceph_connection *con) { struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; dout("process_banner on %p\n", con); if (verify_hello(con) < 0) return -1; /* * Make sure the other end is who we wanted. note that the other * end may not yet know their ip address, so if it's 0.0.0.0, give * them the benefit of the doubt. */ if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr, sizeof(con->peer_addr)) != 0 && !(ceph_addr_is_blank(&con->v1.actual_peer_addr) && con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) { pr_warn("wrong peer, want %s/%u, got %s/%u\n", ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce), ceph_pr_addr(&con->v1.actual_peer_addr), le32_to_cpu(con->v1.actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } /* * did we learn our address? */ if (ceph_addr_is_blank(my_addr)) { memcpy(&my_addr->in_addr, &con->v1.peer_addr_for_me.in_addr, sizeof(con->v1.peer_addr_for_me.in_addr)); ceph_addr_set_port(my_addr, 0); ceph_encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", ceph_pr_addr(my_addr)); } return 0; } static int process_connect(struct ceph_connection *con) { u64 sup_feat = from_msgr(con->msgr)->supported_features; u64 req_feat = from_msgr(con->msgr)->required_features; u64 server_feat = le64_to_cpu(con->v1.in_reply.features); int ret; dout("process_connect on %p tag %d\n", con, con->v1.in_tag); if (con->v1.auth) { int len = le32_to_cpu(con->v1.in_reply.authorizer_len); /* * Any connection that defines ->get_authorizer() * should also define ->add_authorizer_challenge() and * ->verify_authorizer_reply(). * * See get_connect_authorizer(). */ if (con->v1.in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ret = con->ops->add_authorizer_challenge( con, con->v1.auth->authorizer_reply_buf, len); if (ret < 0) return ret; con_out_kvec_reset(con); __prepare_write_connect(con); prepare_read_connect(con); return 0; } if (len) { ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; return ret; } } } switch (con->v1.in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," " my %llx < server's %llx, missing %llx\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; return -1; case CEPH_MSGR_TAG_BADPROTOVER: pr_err("%s%lld %s protocol version mismatch," " my %d != server's %d\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->v1.out_connect.protocol_version), le32_to_cpu(con->v1.in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; return -1; case CEPH_MSGR_TAG_BADAUTHORIZER: con->v1.auth_retry++; dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, con->v1.auth_retry); if (con->v1.auth_retry == 2) { con->error_msg = "connect authorization failure"; return -1; } con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_RESETSESSION: /* * If we connected with a large connect_seq but the peer * has no record of a session with us (no connection, or * connect_seq == 0), they will send RESETSESION to indicate * that they must have reset their session, and may have * dropped messages. */ dout("process_connect got RESET peer seq %u\n", le32_to_cpu(con->v1.in_reply.connect_seq)); pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr)); ceph_con_reset_session(con); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); /* Tell ceph about it. */ mutex_unlock(&con->mutex); if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); if (con->state != CEPH_CON_S_V1_CONNECT_MSG) return -EAGAIN; break; case CEPH_MSGR_TAG_RETRY_SESSION: /* * If we sent a smaller connect_seq than the peer has, try * again with a larger value. */ dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", le32_to_cpu(con->v1.out_connect.connect_seq), le32_to_cpu(con->v1.in_reply.connect_seq)); con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_RETRY_GLOBAL: /* * If we sent a smaller global_seq than the peer has, try * again with a larger value. */ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", con->v1.peer_global_seq, le32_to_cpu(con->v1.in_reply.global_seq)); ceph_get_global_seq(con->msgr, le32_to_cpu(con->v1.in_reply.global_seq)); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_SEQ: case CEPH_MSGR_TAG_READY: if (req_feat & ~server_feat) { pr_err("%s%lld %s protocol feature mismatch," " my required %llx > server's %llx, need %llx\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; return -1; } WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG); con->state = CEPH_CON_S_OPEN; con->v1.auth_retry = 0; /* we authenticated; clear flag */ con->v1.peer_global_seq = le32_to_cpu(con->v1.in_reply.global_seq); con->v1.connect_seq++; con->peer_features = server_feat; dout("process_connect got READY gseq %d cseq %d (%d)\n", con->v1.peer_global_seq, le32_to_cpu(con->v1.in_reply.connect_seq), con->v1.connect_seq); WARN_ON(con->v1.connect_seq != le32_to_cpu(con->v1.in_reply.connect_seq)); if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY) ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX); con->delay = 0; /* reset backoff memory */ if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) { prepare_write_seq(con); prepare_read_seq(con); } else { prepare_read_tag(con); } break; case CEPH_MSGR_TAG_WAIT: /* * If there is a connection race (we are opening * connections to each other), one of us may just have * to WAIT. This shouldn't happen if we are the * client. */ con->error_msg = "protocol error, got WAIT as client"; return -1; default: con->error_msg = "protocol error, garbage tag during connect"; return -1; } return 0; } /* * read (part of) an ack */ static int read_partial_ack(struct ceph_connection *con) { int size = sizeof(con->v1.in_temp_ack); int end = size; return read_partial(con, end, size, &con->v1.in_temp_ack); } /* * We can finally discard anything that's been acked. */ static void process_ack(struct ceph_connection *con) { u64 ack = le64_to_cpu(con->v1.in_temp_ack); if (con->v1.in_tag == CEPH_MSGR_TAG_ACK) ceph_con_discard_sent(con, ack); else ceph_con_discard_requeued(con, ack); prepare_read_tag(con); } static int read_partial_message_chunk(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { int ret, left; BUG_ON(!section); while (section->iov_len < sec_len) { BUG_ON(section->iov_base == NULL); left = sec_len - section->iov_len; ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + section->iov_len, left); if (ret <= 0) return ret; section->iov_len += ret; } if (section->iov_len == sec_len) *crc = crc32c(*crc, section->iov_base, section->iov_len); return 1; } static inline int read_partial_message_section(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { *crc = 0; return read_partial_message_chunk(con, section, sec_len, crc); } static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); if (do_bounce && unlikely(!con->bounce_page)) { con->bounce_page = alloc_page(GFP_NOIO); if (!con->bounce_page) { pr_err("failed to allocate bounce page\n"); return -ENOMEM; } } while (cursor->sr_resid > 0) { struct page *page, *rpage; size_t off, len; int ret; page = ceph_msg_data_next(cursor, &off, &len); rpage = do_bounce ? con->bounce_page : page; /* clamp to what remains in extent */ len = min_t(int, len, cursor->sr_resid); ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); if (ret <= 0) return ret; *crc = ceph_crc32c_page(*crc, rpage, off, ret); ceph_msg_data_advance(cursor, (size_t)ret); cursor->sr_resid -= ret; if (do_bounce) memcpy_page(page, off, rpage, off, ret); } return 1; } static int read_partial_sparse_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc = 0; int ret = 1; if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { if (con->v1.in_sr_kvec.iov_base) ret = read_partial_message_chunk(con, &con->v1.in_sr_kvec, con->v1.in_sr_len, &crc); else if (cursor->sr_resid > 0) ret = read_partial_sparse_msg_extent(con, &crc); if (ret <= 0) break; memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); ret = con->ops->sparse_read(con, cursor, (char **)&con->v1.in_sr_kvec.iov_base); if (ret <= 0) { ret = ret ? ret : 1; /* must return > 0 to indicate success */ break; } con->v1.in_sr_len = ret; } if (do_datacrc) con->in_data_crc = crc; return ret; } static int read_partial_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); struct page *page; size_t page_offset; size_t length; u32 crc = 0; int ret; if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { if (do_datacrc) con->in_data_crc = crc; return ret; } if (do_datacrc) crc = ceph_crc32c_page(crc, page, page_offset, ret); ceph_msg_data_advance(cursor, (size_t)ret); } if (do_datacrc) con->in_data_crc = crc; return 1; /* must return > 0 to indicate success */ } static int read_partial_msg_data_bounce(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; struct page *page; size_t off, len; u32 crc; int ret; if (unlikely(!con->bounce_page)) { con->bounce_page = alloc_page(GFP_NOIO); if (!con->bounce_page) { pr_err("failed to allocate bounce page\n"); return -ENOMEM; } } crc = con->in_data_crc; while (cursor->total_resid) { if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &off, &len); ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); if (ret <= 0) { con->in_data_crc = crc; return ret; } crc = crc32c(crc, page_address(con->bounce_page), ret); memcpy_to_page(page, off, page_address(con->bounce_page), ret); ceph_msg_data_advance(cursor, ret); } con->in_data_crc = crc; return 1; /* must return > 0 to indicate success */ } /* * read (part of) a message. */ static int read_partial_message(struct ceph_connection *con) { struct ceph_msg *m = con->in_msg; int size; int end; int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); u64 seq; u32 crc; dout("read_partial_message con %p msg %p\n", con, m); /* header */ size = sizeof(con->v1.in_hdr); end = size; ret = read_partial(con, end, size, &con->v1.in_hdr); if (ret <= 0) return ret; crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc)); if (cpu_to_le32(crc) != con->v1.in_hdr.crc) { pr_err("read_partial_message bad hdr crc %u != expected %u\n", crc, con->v1.in_hdr.crc); return -EBADMSG; } front_len = le32_to_cpu(con->v1.in_hdr.front_len); if (front_len > CEPH_MSG_MAX_FRONT_LEN) return -EIO; middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) return -EIO; data_len = le32_to_cpu(con->v1.in_hdr.data_len); if (data_len > CEPH_MSG_MAX_DATA_LEN) return -EIO; /* verify seq# */ seq = le64_to_cpu(con->v1.in_hdr.seq); if ((s64)seq - (s64)con->in_seq < 1) { pr_info("skipping %s%lld %s seq %lld expected %lld\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), seq, con->in_seq + 1); con->v1.in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); con->v1.in_tag = CEPH_MSGR_TAG_READY; return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); con->error_msg = "bad message sequence # for incoming message"; return -EBADE; } /* allocate message? */ if (!con->in_msg) { int skip = 0; dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type, front_len, data_len); ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip); if (ret < 0) return ret; BUG_ON((!con->in_msg) ^ skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); con->v1.in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); con->v1.in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; return 1; } BUG_ON(!con->in_msg); BUG_ON(con->in_msg->con != con); m = con->in_msg; m->front.iov_len = 0; /* haven't read it yet */ if (m->middle) m->middle->vec.iov_len = 0; /* prepare for data payload, if any */ if (data_len) prepare_message_data(con->in_msg, data_len); } /* front */ ret = read_partial_message_section(con, &m->front, front_len, &con->in_front_crc); if (ret <= 0) return ret; /* middle */ if (m->middle) { ret = read_partial_message_section(con, &m->middle->vec, middle_len, &con->in_middle_crc); if (ret <= 0) return ret; } /* (page) data */ if (data_len) { if (!m->num_data_items) return -EIO; if (m->sparse_read_total) ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else ret = read_partial_msg_data(con); if (ret <= 0) return ret; } /* footer */ size = sizeof_footer(con); end += size; ret = read_partial(con, end, size, &m->footer); if (ret <= 0) return ret; if (!need_sign) { m->footer.flags = m->old_footer.flags; m->footer.sig = 0; } dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", m, front_len, m->footer.front_crc, middle_len, m->footer.middle_crc, data_len, m->footer.data_crc); /* crc ok? */ if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { pr_err("read_partial_message %p front crc %u != exp. %u\n", m, con->in_front_crc, m->footer.front_crc); return -EBADMSG; } if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { pr_err("read_partial_message %p middle crc %u != exp %u\n", m, con->in_middle_crc, m->footer.middle_crc); return -EBADMSG; } if (do_datacrc && (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { pr_err("read_partial_message %p data crc %u != exp. %u\n", m, con->in_data_crc, le32_to_cpu(m->footer.data_crc)); return -EBADMSG; } if (need_sign && con->ops->check_message_signature && con->ops->check_message_signature(m)) { pr_err("read_partial_message %p signature check failed\n", m); return -EBADMSG; } return 1; /* done! */ } static int read_keepalive_ack(struct ceph_connection *con) { struct ceph_timespec ceph_ts; size_t size = sizeof(ceph_ts); int ret = read_partial(con, size, size, &ceph_ts); if (ret <= 0) return ret; ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); prepare_read_tag(con); return 1; } /* * Read what we can from the socket. */ int ceph_con_v1_try_read(struct ceph_connection *con) { int ret = -1; more: dout("try_read start %p state %d\n", con, con->state); if (con->state != CEPH_CON_S_V1_BANNER && con->state != CEPH_CON_S_V1_CONNECT_MSG && con->state != CEPH_CON_S_OPEN) return 0; BUG_ON(!con->sock); dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag, con->v1.in_base_pos); if (con->state == CEPH_CON_S_V1_BANNER) { ret = read_partial_banner(con); if (ret <= 0) goto out; ret = process_banner(con); if (ret < 0) goto out; con->state = CEPH_CON_S_V1_CONNECT_MSG; /* * Received banner is good, exchange connection info. * Do not reset out_kvec, as sending our banner raced * with receiving peer banner after connect completed. */ ret = prepare_write_connect(con); if (ret < 0) goto out; prepare_read_connect(con); /* Send connection info before awaiting response */ goto out; } if (con->state == CEPH_CON_S_V1_CONNECT_MSG) { ret = read_partial_connect(con); if (ret <= 0) goto out; ret = process_connect(con); if (ret < 0) goto out; goto more; } WARN_ON(con->state != CEPH_CON_S_OPEN); if (con->v1.in_base_pos < 0) { /* * skipping + discarding content. */ ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos); if (ret <= 0) goto out; dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos); con->v1.in_base_pos += ret; if (con->v1.in_base_pos) goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_READY) { /* * what's next? */ ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1); if (ret <= 0) goto out; dout("try_read got tag %d\n", con->v1.in_tag); switch (con->v1.in_tag) { case CEPH_MSGR_TAG_MSG: prepare_read_message(con); break; case CEPH_MSGR_TAG_ACK: prepare_read_ack(con); break; case CEPH_MSGR_TAG_KEEPALIVE2_ACK: prepare_read_keepalive_ack(con); break; case CEPH_MSGR_TAG_CLOSE: ceph_con_close_socket(con); con->state = CEPH_CON_S_CLOSED; goto out; default: goto bad_tag; } } if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) { ret = read_partial_message(con); if (ret <= 0) { switch (ret) { case -EBADMSG: con->error_msg = "bad crc/signature"; fallthrough; case -EBADE: ret = -EIO; break; case -EIO: con->error_msg = "io error"; break; } goto out; } if (con->v1.in_tag == CEPH_MSGR_TAG_READY) goto more; ceph_con_process_message(con); if (con->state == CEPH_CON_S_OPEN) prepare_read_tag(con); goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_ACK || con->v1.in_tag == CEPH_MSGR_TAG_SEQ) { /* * the final handshake seq exchange is semantically * equivalent to an ACK */ ret = read_partial_ack(con); if (ret <= 0) goto out; process_ack(con); goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { ret = read_keepalive_ack(con); if (ret <= 0) goto out; goto more; } out: dout("try_read done on %p ret %d\n", con, ret); return ret; bad_tag: pr_err("try_read bad tag %d\n", con->v1.in_tag); con->error_msg = "protocol error, garbage tag"; ret = -1; goto out; } /* * Write something to the socket. Called in a worker thread when the * socket appears to be writeable and we have something ready to send. */ int ceph_con_v1_try_write(struct ceph_connection *con) { int ret = 1; dout("try_write start %p state %d\n", con, con->state); if (con->state != CEPH_CON_S_PREOPEN && con->state != CEPH_CON_S_V1_BANNER && con->state != CEPH_CON_S_V1_CONNECT_MSG && con->state != CEPH_CON_S_OPEN) return 0; /* open the socket first? */ if (con->state == CEPH_CON_S_PREOPEN) { BUG_ON(con->sock); con->state = CEPH_CON_S_V1_BANNER; con_out_kvec_reset(con); prepare_write_banner(con); prepare_read_banner(con); BUG_ON(con->in_msg); con->v1.in_tag = CEPH_MSGR_TAG_READY; dout("try_write initiating connect on %p new state %d\n", con, con->state); ret = ceph_tcp_connect(con); if (ret < 0) { con->error_msg = "connect error"; goto out; } } more: dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes); BUG_ON(!con->sock); /* kvec data queued? */ if (con->v1.out_kvec_left) { ret = write_partial_kvec(con); if (ret <= 0) goto out; } if (con->v1.out_skip) { ret = write_partial_skip(con); if (ret <= 0) goto out; } /* msg pages? */ if (con->out_msg) { if (con->v1.out_msg_done) { ceph_msg_put(con->out_msg); con->out_msg = NULL; /* we're done with this one */ goto do_next; } ret = write_partial_message_data(con); if (ret == 1) goto more; /* we need to send the footer, too! */ if (ret == 0) goto out; if (ret < 0) { dout("try_write write_partial_message_data err %d\n", ret); goto out; } } do_next: if (con->state == CEPH_CON_S_OPEN) { if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) { prepare_write_keepalive(con); goto more; } /* is anything else pending? */ if (!list_empty(&con->out_queue)) { prepare_write_message(con); goto more; } if (con->in_seq > con->in_seq_acked) { prepare_write_ack(con); goto more; } } /* Nothing to do! */ ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); dout("try_write nothing else to write.\n"); ret = 0; out: dout("try_write done on %p ret %d\n", con, ret); return ret; } void ceph_con_v1_revoke(struct ceph_connection *con) { struct ceph_msg *msg = con->out_msg; WARN_ON(con->v1.out_skip); /* footer */ if (con->v1.out_msg_done) { con->v1.out_skip += con_out_kvec_skip(con); } else { WARN_ON(!msg->data_length); con->v1.out_skip += sizeof_footer(con); } /* data, middle, front */ if (msg->data_length) con->v1.out_skip += msg->cursor.total_resid; if (msg->middle) con->v1.out_skip += con_out_kvec_skip(con); con->v1.out_skip += con_out_kvec_skip(con); dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con, con->v1.out_kvec_bytes, con->v1.out_skip); } void ceph_con_v1_revoke_incoming(struct ceph_connection *con) { unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len); unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len); /* skip rest of message */ con->v1.in_base_pos = con->v1.in_base_pos - sizeof(struct ceph_msg_header) - front_len - middle_len - data_len - sizeof(struct ceph_msg_footer); con->v1.in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos); } bool ceph_con_v1_opened(struct ceph_connection *con) { return con->v1.connect_seq; } void ceph_con_v1_reset_session(struct ceph_connection *con) { con->v1.connect_seq = 0; con->v1.peer_global_seq = 0; } void ceph_con_v1_reset_protocol(struct ceph_connection *con) { con->v1.out_skip = 0; }
387 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 /* SPDX-License-Identifier: GPL-2.0 */ /* * Tick related global functions */ #ifndef _LINUX_TICK_H #define _LINUX_TICK_H #include <linux/clockchips.h> #include <linux/irqflags.h> #include <linux/percpu.h> #include <linux/context_tracking_state.h> #include <linux/cpumask.h> #include <linux/sched.h> #include <linux/rcupdate.h> #include <linux/static_key.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); /* Should be core only, but ARM BL switcher requires it */ extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) extern int tick_cpu_dying(unsigned int cpu); extern void tick_assert_timekeeping_handover(void); #else #define tick_cpu_dying NULL static inline void tick_assert_timekeeping_handover(void) { } #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) extern void tick_freeze(void); extern void tick_unfreeze(void); #else static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } #endif #ifdef CONFIG_TICK_ONESHOT extern void tick_irq_enter(void); # ifndef arch_needs_cpu # define arch_needs_cpu() (0) # endif # else static inline void tick_irq_enter(void) { } #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); #else static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } #endif enum tick_broadcast_mode { TICK_BROADCAST_OFF, TICK_BROADCAST_ON, TICK_BROADCAST_FORCE, }; enum tick_broadcast_state { TICK_BROADCAST_EXIT, TICK_BROADCAST_ENTER, }; extern struct static_key_false arch_needs_tick_broadcast; #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ #ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } #endif static inline void tick_broadcast_enable(void) { tick_broadcast_control(TICK_BROADCAST_ON); } static inline void tick_broadcast_disable(void) { tick_broadcast_control(TICK_BROADCAST_OFF); } static inline void tick_broadcast_force(void) { tick_broadcast_control(TICK_BROADCAST_FORCE); } static inline int tick_broadcast_enter(void) { return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER); } static inline void tick_broadcast_exit(void) { tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); } enum tick_dep_bits { TICK_DEP_BIT_POSIX_TIMER = 0, TICK_DEP_BIT_PERF_EVENTS = 1, TICK_DEP_BIT_SCHED = 2, TICK_DEP_BIT_CLOCK_UNSTABLE = 3, TICK_DEP_BIT_RCU = 4, TICK_DEP_BIT_RCU_EXP = 5 }; #define TICK_DEP_BIT_MAX TICK_DEP_BIT_RCU_EXP #define TICK_DEP_MASK_NONE 0 #define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER) #define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) #define TICK_DEP_MASK_RCU (1 << TICK_DEP_BIT_RCU) #define TICK_DEP_MASK_RCU_EXP (1 << TICK_DEP_BIT_RCU_EXP) #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_stop_tick(void); extern void tick_nohz_idle_retain_tick(void); extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_stop_tick(void) { } static inline void tick_nohz_idle_retain_tick(void) { } static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } static inline ktime_t tick_nohz_get_next_hrtimer(void) { /* Next wake up is the tick period, assume it starts now */ return ktime_add(ktime_get(), TICK_NSEC); } static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; return *delta_next; } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #endif /* !CONFIG_NO_HZ_COMMON */ /* * Mask of CPUs that are nohz_full. * * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu() * check. */ extern cpumask_var_t tick_nohz_full_mask; #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; static inline bool tick_nohz_full_enabled(void) { if (!context_tracking_enabled()) return false; return tick_nohz_full_running; } /* * Check if a CPU is part of the nohz_full subset. Arrange for evaluating * the cpu expression (typically smp_processor_id()) _after_ the static * key. */ #define tick_nohz_full_cpu(_cpu) ({ \ bool __ret = false; \ if (tick_nohz_full_enabled()) \ __ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask); \ __ret; \ }) static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { if (tick_nohz_full_enabled()) cpumask_or(mask, mask, tick_nohz_full_mask); } extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit); extern void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit); extern void tick_nohz_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit); extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu); /* * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases * on top of static keys. */ static inline void tick_dep_set(enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_set(bit); } static inline void tick_dep_clear(enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_clear(bit); } static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { if (tick_nohz_full_cpu(cpu)) tick_nohz_dep_set_cpu(cpu, bit); } static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { if (tick_nohz_full_cpu(cpu)) tick_nohz_dep_clear_cpu(cpu, bit); } static inline void tick_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_set_task(tsk, bit); } static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_clear_task(tsk, bit); } static inline void tick_dep_init_task(struct task_struct *tsk) { atomic_set(&tsk->tick_dep_mask, 0); } static inline void tick_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_set_signal(tsk, bit); } static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_clear_signal(signal, bit); } extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; } static inline void tick_dep_set(enum tick_dep_bits bit) { } static inline void tick_dep_clear(enum tick_dep_bits bit) { } static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_init_task(struct task_struct *tsk) { } static inline void tick_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { } #endif static inline void tick_nohz_task_switch(void) { if (tick_nohz_full_enabled()) __tick_nohz_task_switch(); } static inline void tick_nohz_user_enter_prepare(void) { if (tick_nohz_full_cpu(smp_processor_id())) rcu_nocb_flush_deferred_wakeup(); } #endif
10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLKTRACE_H #define BLKTRACE_H #include <linux/blk-mq.h> #include <linux/relay.h> #include <linux/compat.h> #include <uapi/linux/blktrace_api.h> #include <linux/list.h> #include <linux/blk_types.h> #if defined(CONFIG_BLK_DEV_IO_TRACE) #include <linux/sysfs.h> struct blk_trace { int trace_state; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; u16 act_mask; u64 start_lba; u64 end_lba; u32 pid; u32 dev; struct dentry *dir; struct list_head running_list; atomic_t dropped; }; extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); __printf(3, 4) void __blk_trace_note_message(struct blk_trace *bt, struct cgroup_subsys_state *css, const char *fmt, ...); /** * blk_add_trace_msg - Add a (simple) message to the blktrace stream * @q: queue the io is for * @fmt: format to print message in * args... Variable argument list for format * * Description: * Records a (simple) message onto the blktrace stream. * * NOTE: BLK_TN_MAX_MSG characters are output at most. * NOTE: Can not use 'static inline' due to presence of var args... * **/ #define blk_add_cgroup_trace_msg(q, css, fmt, ...) \ do { \ struct blk_trace *bt; \ \ rcu_read_lock(); \ bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __blk_trace_note_message(bt, css, fmt, ##__VA_ARGS__);\ rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) #define BLK_TN_MAX_MSG 128 static inline bool blk_trace_note_message_enabled(struct request_queue *q) { struct blk_trace *bt; bool ret; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); ret = bt && (bt->act_mask & BLK_TC_NOTIFY); rcu_read_unlock(); return ret; } extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) # define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) # define blk_trace_note_message_enabled(q) (false) static inline int blk_trace_remove(struct request_queue *q) { return -ENOTTY; } #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_COMPAT struct compat_blk_user_trace_setup { char name[BLKTRACE_BDEV_SIZE]; u16 act_mask; u32 buf_size; u32 buf_nr; compat_u64 start_lba; compat_u64 end_lba; u32 pid; }; #define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup) #endif void blk_fill_rwbs(char *rwbs, blk_opf_t opf); static inline sector_t blk_rq_trace_sector(struct request *rq) { /* * Tracing should ignore starting sector for passthrough requests and * requests where starting sector didn't get set. */ if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) return 0; return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) { return blk_rq_is_passthrough(rq) ? 0 : blk_rq_sectors(rq); } #endif
26 2 26 1 11028 11055 4 17 18 18 22 22 22 22 26 12579 12579 26 3197 3204 3211 3206 3212 249 27 27 1 25 1 25 27 25 21 26 5 5 27 27 22 27 27 23 26 26 25 26 26 26 405 408 408 408 407 408 10998 11031 10981 11032 11002 10985 3566 3572 3570 3570 3560 57 57 57 54 103 492 492 492 492 491 263 265 262 264 258 261 264 362 361 13546 1 1 1 1 1 1 1 1 5 5 5 5 5 2 4 4 4 5 21 21 21 21 1 20 3 18 18 3 18 18 18 17 18 4 8 7 6 9 12 7 9 9 12 9 12 12 1 12 1 1 1 7 7 7 7 7 7 7 7 12 12 12 12 7 7 7 7 12 12 12 6 6687 6686 6062 6078 6078 6076 1229 9 9 9 9 9 9 9 26 26 24 24 24 23 24 24 23 24 7 6 2 27 28 2 31 24 24 23 6 17 30 16 34 34 29 21 5 16 21 34 18 18 26 3 26 25 26 12 25 3 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 // SPDX-License-Identifier: GPL-2.0 /* * Kernel timekeeping code and accessor functions. Based on code from * timer.c, moved in commit 8524070b7982. */ #include <linux/timekeeper_internal.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/nmi.h> #include <linux/sched.h> #include <linux/sched/loadavg.h> #include <linux/sched/clock.h> #include <linux/syscore_ops.h> #include <linux/clocksource.h> #include <linux/jiffies.h> #include <linux/time.h> #include <linux/timex.h> #include <linux/tick.h> #include <linux/stop_machine.h> #include <linux/pvclock_gtod.h> #include <linux/compiler.h> #include <linux/audit.h> #include <linux/random.h> #include "tick-internal.h" #include "ntp_internal.h" #include "timekeeping_internal.h" #define TK_CLEAR_NTP (1 << 0) #define TK_CLOCK_WAS_SET (1 << 1) #define TK_UPDATE_ALL (TK_CLEAR_NTP | TK_CLOCK_WAS_SET) enum timekeeping_adv_mode { /* Update timekeeper when a tick has passed */ TK_ADV_TICK, /* Update timekeeper on a direct frequency change */ TK_ADV_FREQ }; /* * The most important data for readout fits into a single 64 byte * cache line. */ struct tk_data { seqcount_raw_spinlock_t seq; struct timekeeper timekeeper; struct timekeeper shadow_timekeeper; raw_spinlock_t lock; } ____cacheline_aligned; static struct tk_data tk_core; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; /** * struct tk_fast - NMI safe timekeeper * @seq: Sequence counter for protecting updates. The lowest bit * is the index for the tk_read_base array * @base: tk_read_base array. Access is indexed by the lowest bit of * @seq. * * See @update_fast_timekeeper() below. */ struct tk_fast { seqcount_latch_t seq; struct tk_read_base base[2]; }; /* Suspend-time cycles value for halted fast timekeeper. */ static u64 cycles_at_suspend; static u64 dummy_clock_read(struct clocksource *cs) { if (timekeeping_suspended) return cycles_at_suspend; return local_clock(); } static struct clocksource dummy_clock = { .read = dummy_clock_read, }; /* * Boot time initialization which allows local_clock() to be utilized * during early boot when clocksources are not available. local_clock() * returns nanoseconds already so no conversion is required, hence mult=1 * and shift=0. When the first proper clocksource is installed then * the fast time keepers are updated with the correct values. */ #define FAST_TK_INIT \ { \ .clock = &dummy_clock, \ .mask = CLOCKSOURCE_MASK(64), \ .mult = 1, \ .shift = 0, \ } static struct tk_fast tk_fast_mono ____cacheline_aligned = { .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), .base[0] = FAST_TK_INIT, .base[1] = FAST_TK_INIT, }; static struct tk_fast tk_fast_raw ____cacheline_aligned = { .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), .base[0] = FAST_TK_INIT, .base[1] = FAST_TK_INIT, }; unsigned long timekeeper_lock_irqsave(void) { unsigned long flags; raw_spin_lock_irqsave(&tk_core.lock, flags); return flags; } void timekeeper_unlock_irqrestore(unsigned long flags) { raw_spin_unlock_irqrestore(&tk_core.lock, flags); } /* * Multigrain timestamps require tracking the latest fine-grained timestamp * that has been issued, and never returning a coarse-grained timestamp that is * earlier than that value. * * mg_floor represents the latest fine-grained time that has been handed out as * a file timestamp on the system. This is tracked as a monotonic ktime_t, and * converted to a realtime clock value on an as-needed basis. * * Maintaining mg_floor ensures the multigrain interfaces never issue a * timestamp earlier than one that has been previously issued. * * The exception to this rule is when there is a backward realtime clock jump. If * such an event occurs, a timestamp can appear to be earlier than a previous one. */ static __cacheline_aligned_in_smp atomic64_t mg_floor; static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; tk->raw_sec++; } } static inline struct timespec64 tk_xtime(const struct timekeeper *tk) { struct timespec64 ts; ts.tv_sec = tk->xtime_sec; ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); return ts; } static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec += ts->tv_sec; tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) { struct timespec64 tmp; /* * Verify consistency of: offset_real = -wall_to_monotonic * before modifying anything */ set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec, -tk->wall_to_monotonic.tv_nsec); WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp)); tk->wall_to_monotonic = wtm; set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec); /* Paired with READ_ONCE() in ktime_mono_to_any() */ WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp)); WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0))); } static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { /* Paired with READ_ONCE() in ktime_mono_to_any() */ WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta)); /* * Timespec representation for VDSO update to avoid 64bit division * on every update. */ tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); } /* * tk_clock_read - atomic clocksource read() helper * * This helper is necessary to use in the read paths because, while the * seqcount ensures we don't return a bad value while structures are updated, * it doesn't protect from potential crashes. There is the possibility that * the tkr's clocksource may change between the read reference, and the * clock reference passed to the read function. This can cause crashes if * the wrong clocksource is passed to the wrong read function. * This isn't necessary to use when holding the tk_core.lock or doing * a read of the fast-timekeeper tkrs (which is protected by its own locking * and update logic). */ static inline u64 tk_clock_read(const struct tk_read_base *tkr) { struct clocksource *clock = READ_ONCE(tkr->clock); return clock->read(clock); } /** * tk_setup_internals - Set up internals to use clocksource clock. * * @tk: The target timekeeper to setup. * @clock: Pointer to clocksource. * * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment * pair and interval request. * * Unless you're the timekeeping code, you should not be using this! */ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) { u64 interval; u64 tmp, ntpinterval; struct clocksource *old_clock; ++tk->cs_was_changed_seq; old_clock = tk->tkr_mono.clock; tk->tkr_mono.clock = clock; tk->tkr_mono.mask = clock->mask; tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); tk->tkr_raw.clock = clock; tk->tkr_raw.mask = clock->mask; tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; ntpinterval = tmp; tmp += clock->mult/2; do_div(tmp, clock->mult); if (tmp == 0) tmp = 1; interval = (u64) tmp; tk->cycle_interval = interval; /* Go back from cycles -> shifted ns */ tk->xtime_interval = interval * clock->mult; tk->xtime_remainder = ntpinterval - tk->xtime_interval; tk->raw_interval = interval * clock->mult; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) { tk->tkr_mono.xtime_nsec >>= -shift_change; tk->tkr_raw.xtime_nsec >>= -shift_change; } else { tk->tkr_mono.xtime_nsec <<= shift_change; tk->tkr_raw.xtime_nsec <<= shift_change; } } tk->tkr_mono.shift = clock->shift; tk->tkr_raw.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; tk->ntp_tick = ntpinterval << tk->ntp_error_shift; /* * The timekeeper keeps its own mult values for the currently * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ tk->tkr_mono.mult = clock->mult; tk->tkr_raw.mult = clock->mult; tk->ntp_err_mult = 0; tk->skip_second_overflow = 0; } /* Timekeeper helper functions. */ static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) { return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); } static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) { /* Calculate the delta since the last update_wall_time() */ u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; /* * This detects both negative motion and the case where the delta * overflows the multiplication with tkr->mult. */ if (unlikely(delta > tkr->clock->max_cycles)) { /* * Handle clocksource inconsistency between CPUs to prevent * time from going backwards by checking for the MSB of the * mask being set in the delta. */ if (delta & ~(mask >> 1)) return tkr->xtime_nsec >> tkr->shift; return delta_to_ns_safe(tkr, delta); } return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; } static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) { return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); } /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update * @tkf: Pointer to NMI safe timekeeper * * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. * * Employ the latch technique; see @write_seqcount_latch. * * So if a NMI hits the update of base[0] then it will use base[1] * which is still consistent. In the worst case this can result is a * slightly wrong timestamp (a few nanoseconds). See * @ktime_get_mono_fast_ns. */ static void update_fast_timekeeper(const struct tk_read_base *tkr, struct tk_fast *tkf) { struct tk_read_base *base = tkf->base; /* Force readers off to base[1] */ write_seqcount_latch_begin(&tkf->seq); /* Update base[0] */ memcpy(base, tkr, sizeof(*base)); /* Force readers back to base[0] */ write_seqcount_latch(&tkf->seq); /* Update base[1] */ memcpy(base + 1, base, sizeof(*base)); write_seqcount_latch_end(&tkf->seq); } static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; unsigned int seq; u64 now; do { seq = read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); now += timekeeping_get_ns(tkr); } while (read_seqcount_latch_retry(&tkf->seq, seq)); return now; } /** * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic * * This timestamp is not guaranteed to be monotonic across an update. * The timestamp is calculated by: * * now = base_mono + clock_delta * slope * * So if the update lowers the slope, readers who are forced to the * not yet updated second array are still using the old steeper slope. * * tmono * ^ * | o n * | o n * | u * | o * |o * |12345678---> reader order * * o = old slope * u = update * n = new slope * * So reader 6 will observe time going backwards versus reader 5. * * While other CPUs are likely to be able to observe that, the only way * for a CPU local observation is when an NMI hits in the middle of * the update. Timestamps taken from that NMI context might be ahead * of the following timestamps. Callers need to be aware of that and * deal with it. */ u64 notrace ktime_get_mono_fast_ns(void) { return __ktime_get_fast_ns(&tk_fast_mono); } EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); /** * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw * * Contrary to ktime_get_mono_fast_ns() this is always correct because the * conversion factor is not affected by NTP/PTP correction. */ u64 notrace ktime_get_raw_fast_ns(void) { return __ktime_get_fast_ns(&tk_fast_raw); } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); /** * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. * * To keep it NMI safe since we're accessing from tracing, we're not using a * separate timekeeper with updates to monotonic clock and boot offset * protected with seqcounts. This has the following minor side effects: * * (1) Its possible that a timestamp be taken after the boot offset is updated * but before the timekeeper is updated. If this happens, the new boot offset * is added to the old timekeeping making the clock appear to update slightly * earlier: * CPU 0 CPU 1 * timekeeping_inject_sleeptime64() * __timekeeping_inject_sleeptime(tk, delta); * timestamp(); * timekeeping_update_staged(tkd, TK_CLEAR_NTP...); * * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be * partially updated. Since the tk->offs_boot update is a rare event, this * should be a rare occurrence which postprocessing should be able to handle. * * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns() * apply as well. */ u64 notrace ktime_get_boot_fast_ns(void) { struct timekeeper *tk = &tk_core.timekeeper; return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); /** * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. * * The same limitations as described for ktime_get_boot_fast_ns() apply. The * mono time and the TAI offset are not read atomically which may yield wrong * readouts. However, an update of the TAI offset is an rare event e.g., caused * by settime or adjtimex with an offset. The user of this function has to deal * with the possibility of wrong timestamps in post processing. */ u64 notrace ktime_get_tai_fast_ns(void) { struct timekeeper *tk = &tk_core.timekeeper; return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); } EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); /** * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. * * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering. */ u64 ktime_get_real_fast_ns(void) { struct tk_fast *tkf = &tk_fast_mono; struct tk_read_base *tkr; u64 baser, delta; unsigned int seq; do { seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); baser = ktime_to_ns(tkr->base_real); delta = timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); return baser + delta; } EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. * * It generally is unsafe to access the clocksource after timekeeping has been * suspended, so take a snapshot of the readout base of @tk and use it as the * fast timekeeper's readout base while suspended. It will return the same * number of cycles every time until timekeeping is resumed at which time the * proper readout base for the fast timekeeper will be restored automatically. */ static void halt_fast_timekeeper(const struct timekeeper *tk) { static struct tk_read_base tkr_dummy; const struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tk_clock_read(tkr); tkr_dummy.clock = &dummy_clock; tkr_dummy.base_real = tkr->base + tk->offs_real; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); tkr = &tk->tkr_raw; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) { raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); } /** * pvclock_gtod_register_notifier - register a pvclock timedata update listener * @nb: Pointer to the notifier block to register */ int pvclock_gtod_register_notifier(struct notifier_block *nb) { struct timekeeper *tk = &tk_core.timekeeper; int ret; guard(raw_spinlock_irqsave)(&tk_core.lock); ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); update_pvclock_gtod(tk, true); return ret; } EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); /** * pvclock_gtod_unregister_notifier - unregister a pvclock * timedata update listener * @nb: Pointer to the notifier block to unregister */ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) { guard(raw_spinlock_irqsave)(&tk_core.lock); return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); } EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); /* * tk_update_leap_state - helper to update the next_leap_ktime */ static inline void tk_update_leap_state(struct timekeeper *tk) { tk->next_leap_ktime = ntp_get_next_leap(); if (tk->next_leap_ktime != KTIME_MAX) /* Convert to monotonic time */ tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); } /* * Leap state update for both shadow and the real timekeeper * Separate to spare a full memcpy() of the timekeeper. */ static void tk_update_leap_state_all(struct tk_data *tkd) { write_seqcount_begin(&tkd->seq); tk_update_leap_state(&tkd->shadow_timekeeper); tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime; write_seqcount_end(&tkd->seq); } /* * Update the ktime_t based scalar nsec members of the timekeeper */ static inline void tk_update_ktime_data(struct timekeeper *tk) { u64 seconds; u32 nsec; /* * The xtime based monotonic readout is: * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now(); * The ktime based monotonic readout is: * nsec = base_mono + now(); * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec */ seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec = (u32) tk->wall_to_monotonic.tv_nsec; tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); /* * The sum of the nanoseconds portions of xtime and * wall_to_monotonic can be greater/equal one second. Take * this into account before updating tk->ktime_sec. */ nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; /* Update the monotonic raw base */ tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); } /* * Restore the shadow timekeeper from the real timekeeper. */ static void timekeeping_restore_shadow(struct tk_data *tkd) { lockdep_assert_held(&tkd->lock); memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper)); } static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) { struct timekeeper *tk = &tk_core.shadow_timekeeper; lockdep_assert_held(&tkd->lock); /* * Block out readers before running the updates below because that * updates VDSO and other time related infrastructure. Not blocking * the readers might let a reader see time going backwards when * reading from the VDSO after the VDSO update and then reading in * the kernel from the timekeeper before that got updated. */ write_seqcount_begin(&tkd->seq); if (action & TK_CLEAR_NTP) { tk->ntp_error = 0; ntp_clear(); } tk_update_leap_state(tk); tk_update_ktime_data(tk); update_vsyscall(tk); update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); if (action & TK_CLOCK_WAS_SET) tk->clock_was_set_seq++; /* * Update the real timekeeper. * * We could avoid this memcpy() by switching pointers, but that has * the downside that the reader side does not longer benefit from * the cacheline optimized data layout of the timekeeper and requires * another indirection. */ memcpy(&tkd->timekeeper, tk, sizeof(*tk)); write_seqcount_end(&tkd->seq); } /** * timekeeping_forward_now - update clock to the current time * @tk: Pointer to the timekeeper to update * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ static void timekeeping_forward_now(struct timekeeper *tk) { u64 cycle_now, delta; cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; while (delta > 0) { u64 max = tk->tkr_mono.clock->max_cycles; u64 incr = delta < max ? delta : max; tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; tk_normalize_xtime(tk); delta -= incr; } } /** * ktime_get_real_ts64 - Returns the time of day in a timespec64. * @ts: pointer to the timespec to be set * * Returns the time of day in a timespec64 (WARN if suspended). */ void ktime_get_real_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; u64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); } EXPORT_SYMBOL(ktime_get_real_ts64); ktime_t ktime_get(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base; u64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); base = tk->tkr_mono.base; nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); return ktime_add_ns(base, nsecs); } EXPORT_SYMBOL_GPL(ktime_get); u32 ktime_get_resolution_ns(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; u32 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift; } while (read_seqcount_retry(&tk_core.seq, seq)); return nsecs; } EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); static ktime_t *offsets[TK_OFFS_MAX] = { [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, }; ktime_t ktime_get_with_offset(enum tk_offsets offs) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base, *offset = offsets[offs]; u64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); base = ktime_add(tk->tkr_mono.base, *offset); nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); return ktime_add_ns(base, nsecs); } EXPORT_SYMBOL_GPL(ktime_get_with_offset); ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base, *offset = offsets[offs]; u64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); base = ktime_add(tk->tkr_mono.base, *offset); nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; } while (read_seqcount_retry(&tk_core.seq, seq)); return ktime_add_ns(base, nsecs); } EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); /** * ktime_mono_to_any() - convert monotonic time to any other time * @tmono: time to convert. * @offs: which offset to use */ ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs) { ktime_t *offset = offsets[offs]; unsigned int seq; ktime_t tconv; if (IS_ENABLED(CONFIG_64BIT)) { /* * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and * tk_update_sleep_time(). */ return ktime_add(tmono, READ_ONCE(*offset)); } do { seq = read_seqcount_begin(&tk_core.seq); tconv = ktime_add(tmono, *offset); } while (read_seqcount_retry(&tk_core.seq, seq)); return tconv; } EXPORT_SYMBOL_GPL(ktime_mono_to_any); /** * ktime_get_raw - Returns the raw monotonic time in ktime_t format */ ktime_t ktime_get_raw(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base; u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); base = tk->tkr_raw.base; nsecs = timekeeping_get_ns(&tk->tkr_raw); } while (read_seqcount_retry(&tk_core.seq, seq)); return ktime_add_ns(base, nsecs); } EXPORT_SYMBOL_GPL(ktime_get_raw); /** * ktime_get_ts64 - get the monotonic clock in timespec64 format * @ts: pointer to timespec variable * * The function calculates the monotonic clock from the realtime * clock and the wall_to_monotonic offset and stores the result * in normalized timespec64 format in the variable pointed to by @ts. */ void ktime_get_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 tomono; unsigned int seq; u64 nsec; WARN_ON(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; nsec = timekeeping_get_ns(&tk->tkr_mono); tomono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); ts->tv_sec += tomono.tv_sec; ts->tv_nsec = 0; timespec64_add_ns(ts, nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts64); /** * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC * * Returns the seconds portion of CLOCK_MONOTONIC with a single non * serialized read. tk->ktime_sec is of type 'unsigned long' so this * works on both 32 and 64 bit systems. On 32 bit systems the readout * covers ~136 years of uptime which should be enough to prevent * premature wrap arounds. */ time64_t ktime_get_seconds(void) { struct timekeeper *tk = &tk_core.timekeeper; WARN_ON(timekeeping_suspended); return tk->ktime_sec; } EXPORT_SYMBOL_GPL(ktime_get_seconds); /** * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME * * Returns the wall clock seconds since 1970. * * For 64bit systems the fast access to tk->xtime_sec is preserved. On * 32bit systems the access must be protected with the sequence * counter to provide "atomic" access to the 64bit tk->xtime_sec * value. */ time64_t ktime_get_real_seconds(void) { struct timekeeper *tk = &tk_core.timekeeper; time64_t seconds; unsigned int seq; if (IS_ENABLED(CONFIG_64BIT)) return tk->xtime_sec; do { seq = read_seqcount_begin(&tk_core.seq); seconds = tk->xtime_sec; } while (read_seqcount_retry(&tk_core.seq, seq)); return seconds; } EXPORT_SYMBOL_GPL(ktime_get_real_seconds); /** * __ktime_get_real_seconds - The same as ktime_get_real_seconds * but without the sequence counter protect. This internal function * is called just when timekeeping lock is already held. */ noinstr time64_t __ktime_get_real_seconds(void) { struct timekeeper *tk = &tk_core.timekeeper; return tk->xtime_sec; } /** * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter * @systime_snapshot: pointer to struct receiving the system time snapshot */ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base_raw; ktime_t base_real; ktime_t base_boot; u64 nsec_raw; u64 nsec_real; u64 now; WARN_ON_ONCE(timekeeping_suspended); do { seq = read_seqcount_begin(&tk_core.seq); now = tk_clock_read(&tk->tkr_mono); systime_snapshot->cs_id = tk->tkr_mono.clock->id; systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; base_real = ktime_add(tk->tkr_mono.base, tk_core.timekeeper.offs_real); base_boot = ktime_add(tk->tkr_mono.base, tk_core.timekeeper.offs_boot); base_raw = tk->tkr_raw.base; nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); } while (read_seqcount_retry(&tk_core.seq, seq)); systime_snapshot->cycles = now; systime_snapshot->real = ktime_add_ns(base_real, nsec_real); systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real); systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); } EXPORT_SYMBOL_GPL(ktime_get_snapshot); /* Scale base by mult/div checking for overflow */ static int scale64_check_overflow(u64 mult, u64 div, u64 *base) { u64 tmp, rem; tmp = div64_u64_rem(*base, div, &rem); if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) || ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) return -EOVERFLOW; tmp *= mult; rem = div64_u64(rem * mult, div); *base = tmp + rem; return 0; } /** * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval * @history: Snapshot representing start of history * @partial_history_cycles: Cycle offset into history (fractional part) * @total_history_cycles: Total history length in cycles * @discontinuity: True indicates clock was set on history period * @ts: Cross timestamp that should be adjusted using * partial/total ratio * * Helper function used by get_device_system_crosststamp() to correct the * crosstimestamp corresponding to the start of the current interval to the * system counter value (timestamp point) provided by the driver. The * total_history_* quantities are the total history starting at the provided * reference point and ending at the start of the current interval. The cycle * count between the driver timestamp point and the start of the current * interval is partial_history_cycles. */ static int adjust_historical_crosststamp(struct system_time_snapshot *history, u64 partial_history_cycles, u64 total_history_cycles, bool discontinuity, struct system_device_crosststamp *ts) { struct timekeeper *tk = &tk_core.timekeeper; u64 corr_raw, corr_real; bool interp_forward; int ret; if (total_history_cycles == 0 || partial_history_cycles == 0) return 0; /* Interpolate shortest distance from beginning or end of history */ interp_forward = partial_history_cycles > total_history_cycles / 2; partial_history_cycles = interp_forward ? total_history_cycles - partial_history_cycles : partial_history_cycles; /* * Scale the monotonic raw time delta by: * partial_history_cycles / total_history_cycles */ corr_raw = (u64)ktime_to_ns( ktime_sub(ts->sys_monoraw, history->raw)); ret = scale64_check_overflow(partial_history_cycles, total_history_cycles, &corr_raw); if (ret) return ret; /* * If there is a discontinuity in the history, scale monotonic raw * correction by: * mult(real)/mult(raw) yielding the realtime correction * Otherwise, calculate the realtime correction similar to monotonic * raw calculation */ if (discontinuity) { corr_real = mul_u64_u32_div (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); } else { corr_real = (u64)ktime_to_ns( ktime_sub(ts->sys_realtime, history->real)); ret = scale64_check_overflow(partial_history_cycles, total_history_cycles, &corr_real); if (ret) return ret; } /* Fixup monotonic raw and real time time values */ if (interp_forward) { ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw); ts->sys_realtime = ktime_add_ns(history->real, corr_real); } else { ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real); } return 0; } /* * timestamp_in_interval - true if ts is chronologically in [start, end] * * True if ts occurs chronologically at or after start, and before or at end. */ static bool timestamp_in_interval(u64 start, u64 end, u64 ts) { if (ts >= start && ts <= end) return true; if (start > end && (ts >= start || ts <= end)) return true; return false; } static bool convert_clock(u64 *val, u32 numerator, u32 denominator) { u64 rem, res; if (!numerator || !denominator) return false; res = div64_u64_rem(*val, denominator, &rem) * numerator; *val = res + div_u64(rem * numerator, denominator); return true; } static bool convert_base_to_cs(struct system_counterval_t *scv) { struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; struct clocksource_base *base; u32 num, den; /* The timestamp was taken from the time keeper clock source */ if (cs->id == scv->cs_id) return true; /* * Check whether cs_id matches the base clock. Prevent the compiler from * re-evaluating @base as the clocksource might change concurrently. */ base = READ_ONCE(cs->base); if (!base || base->id != scv->cs_id) return false; num = scv->use_nsecs ? cs->freq_khz : base->numerator; den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; if (!convert_clock(&scv->cycles, num, den)) return false; scv->cycles += base->offset; return true; } static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) { struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; struct clocksource_base *base; /* * Check whether base_id matches the base clock. Prevent the compiler from * re-evaluating @base as the clocksource might change concurrently. */ base = READ_ONCE(cs->base); if (!base || base->id != base_id) return false; *cycles -= base->offset; if (!convert_clock(cycles, base->denominator, base->numerator)) return false; return true; } static bool convert_ns_to_cs(u64 *delta) { struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) return false; *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); return true; } /** * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp * @treal: CLOCK_REALTIME timestamp to convert * @base_id: base clocksource id * @cycles: pointer to store the converted base clock timestamp * * Converts a supplied, future realtime clock value to the corresponding base clock value. * * Return: true if the conversion is successful, false otherwise. */ bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; u64 delta; do { seq = read_seqcount_begin(&tk_core.seq); if ((u64)treal < tk->tkr_mono.base_real) return false; delta = (u64)treal - tk->tkr_mono.base_real; if (!convert_ns_to_cs(&delta)) return false; *cycles = tk->tkr_mono.cycle_last + delta; if (!convert_cs_to_base(cycles, base_id)) return false; } while (read_seqcount_retry(&tk_core.seq, seq)); return true; } EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); /** * get_device_system_crosststamp - Synchronously capture system/device timestamp * @get_time_fn: Callback to get simultaneous device time and * system counter from the device driver * @ctx: Context passed to get_time_fn() * @history_begin: Historical reference point used to interpolate system * time when counter provided by the driver is before the current interval * @xtstamp: Receives simultaneously captured system and device time * * Reads a timestamp from a device and correlates it to system time */ int get_device_system_crosststamp(int (*get_time_fn) (ktime_t *device_time, struct system_counterval_t *sys_counterval, void *ctx), void *ctx, struct system_time_snapshot *history_begin, struct system_device_crosststamp *xtstamp) { struct system_counterval_t system_counterval; struct timekeeper *tk = &tk_core.timekeeper; u64 cycles, now, interval_start; unsigned int clock_was_set_seq = 0; ktime_t base_real, base_raw; u64 nsec_real, nsec_raw; u8 cs_was_changed_seq; unsigned int seq; bool do_interp; int ret; do { seq = read_seqcount_begin(&tk_core.seq); /* * Try to synchronously capture device time and a system * counter value calling back into the device driver */ ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); if (ret) return ret; /* * Verify that the clocksource ID associated with the captured * system counter value is the same as for the currently * installed timekeeper clocksource */ if (system_counterval.cs_id == CSID_GENERIC || !convert_base_to_cs(&system_counterval)) return -ENODEV; cycles = system_counterval.cycles; /* * Check whether the system counter value provided by the * device driver is on the current timekeeping interval. */ now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; if (!timestamp_in_interval(interval_start, now, cycles)) { clock_was_set_seq = tk->clock_was_set_seq; cs_was_changed_seq = tk->cs_was_changed_seq; cycles = interval_start; do_interp = true; } else { do_interp = false; } base_real = ktime_add(tk->tkr_mono.base, tk_core.timekeeper.offs_real); base_raw = tk->tkr_raw.base; nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); } while (read_seqcount_retry(&tk_core.seq, seq)); xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); /* * Interpolate if necessary, adjusting back from the start of the * current interval */ if (do_interp) { u64 partial_history_cycles, total_history_cycles; bool discontinuity; /* * Check that the counter value is not before the provided * history reference and that the history doesn't cross a * clocksource change */ if (!history_begin || !timestamp_in_interval(history_begin->cycles, cycles, system_counterval.cycles) || history_begin->cs_was_changed_seq != cs_was_changed_seq) return -EINVAL; partial_history_cycles = cycles - system_counterval.cycles; total_history_cycles = cycles - history_begin->cycles; discontinuity = history_begin->clock_was_set_seq != clock_was_set_seq; ret = adjust_historical_crosststamp(history_begin, partial_history_cycles, total_history_cycles, discontinuity, xtstamp); if (ret) return ret; } return 0; } EXPORT_SYMBOL_GPL(get_device_system_crosststamp); /** * timekeeping_clocksource_has_base - Check whether the current clocksource * is based on given a base clock * @id: base clocksource ID * * Note: The return value is a snapshot which can become invalid right * after the function returns. * * Return: true if the timekeeper clocksource has a base clock with @id, * false otherwise */ bool timekeeping_clocksource_has_base(enum clocksource_ids id) { /* * This is a snapshot, so no point in using the sequence * count. Just prevent the compiler from re-evaluating @base as the * clocksource might change concurrently. */ struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); return base ? base->id == id : false; } EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); /** * do_settimeofday64 - Sets the time of day. * @ts: pointer to the timespec64 variable containing the new time * * Sets the time of day to the new time and update NTP and notify hrtimers */ int do_settimeofday64(const struct timespec64 *ts) { struct timespec64 ts_delta, xt; if (!timespec64_valid_settod(ts)) return -EINVAL; scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { struct timekeeper *tks = &tk_core.shadow_timekeeper; timekeeping_forward_now(tks); xt = tk_xtime(tks); ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) { timekeeping_restore_shadow(&tk_core); return -EINVAL; } tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta)); tk_set_xtime(tks, ts); timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); } /* Signal hrtimers about time change */ clock_was_set(CLOCK_SET_WALL); audit_tk_injoffset(ts_delta); add_device_randomness(ts, sizeof(*ts)); return 0; } EXPORT_SYMBOL(do_settimeofday64); /** * timekeeping_inject_offset - Adds or subtracts from the current time. * @ts: Pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time. */ static int timekeeping_inject_offset(const struct timespec64 *ts) { if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { struct timekeeper *tks = &tk_core.shadow_timekeeper; struct timespec64 tmp; timekeeping_forward_now(tks); /* Make sure the proposed value is valid */ tmp = timespec64_add(tk_xtime(tks), *ts); if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || !timespec64_valid_settod(&tmp)) { timekeeping_restore_shadow(&tk_core); return -EINVAL; } tk_xtime_add(tks, ts); tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); } /* Signal hrtimers about time change */ clock_was_set(CLOCK_SET_WALL); return 0; } /* * Indicates if there is an offset between the system clock and the hardware * clock/persistent clock/rtc. */ int persistent_clock_is_local; /* * Adjust the time obtained from the CMOS to be UTC time instead of * local time. * * This is ugly, but preferable to the alternatives. Otherwise we * would either need to write a program to do it in /etc/rc (and risk * confusion if the program gets run more than once; it would also be * hard to make the program warp the clock precisely n hours) or * compile in the timezone information into the kernel. Bad, bad.... * * - TYT, 1992-01-01 * * The best thing to do is to keep the CMOS clock in universal time (UTC) * as real UNIX machines always do it. This avoids all headaches about * daylight saving times and warping kernel clocks. */ void timekeeping_warp_clock(void) { if (sys_tz.tz_minuteswest != 0) { struct timespec64 adjust; persistent_clock_is_local = 1; adjust.tv_sec = sys_tz.tz_minuteswest * 60; adjust.tv_nsec = 0; timekeeping_inject_offset(&adjust); } } /* * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic */ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) { tk->tai_offset = tai_offset; tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); } /* * change_clocksource - Swaps clocksources if a new one is available * * Accumulates current time interval and initializes new clocksource */ static int change_clocksource(void *data) { struct clocksource *new = data, *old = NULL; /* * If the clocksource is in a module, get a module reference. * Succeeds for built-in code (owner == NULL) as well. Abort if the * reference can't be acquired. */ if (!try_module_get(new->owner)) return 0; /* Abort if the device can't be enabled */ if (new->enable && new->enable(new) != 0) { module_put(new->owner); return 0; } scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { struct timekeeper *tks = &tk_core.shadow_timekeeper; timekeeping_forward_now(tks); old = tks->tkr_mono.clock; tk_setup_internals(tks, new); timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); } if (old) { if (old->disable) old->disable(old); module_put(old->owner); } return 0; } /** * timekeeping_notify - Install a new clock source * @clock: pointer to the clock source * * This function is called from clocksource.c after a new, better clock * source has been registered. The caller holds the clocksource_mutex. */ int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &tk_core.timekeeper; if (tk->tkr_mono.clock == clock) return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); return tk->tkr_mono.clock == clock ? 0 : -1; } /** * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec * @ts: pointer to the timespec64 to be set * * Returns the raw monotonic time (completely un-modified by ntp) */ void ktime_get_raw_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->raw_sec; nsecs = timekeeping_get_ns(&tk->tkr_raw); } while (read_seqcount_retry(&tk_core.seq, seq)); ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); } EXPORT_SYMBOL(ktime_get_raw_ts64); /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres */ int timekeeping_valid_for_hres(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; int ret; do { seq = read_seqcount_begin(&tk_core.seq); ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqcount_retry(&tk_core.seq, seq)); return ret; } /** * timekeeping_max_deferment - Returns max time the clocksource can be deferred */ u64 timekeeping_max_deferment(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; u64 ret; do { seq = read_seqcount_begin(&tk_core.seq); ret = tk->tkr_mono.clock->max_idle_ns; } while (read_seqcount_retry(&tk_core.seq, seq)); return ret; } /** * read_persistent_clock64 - Return time from the persistent clock. * @ts: Pointer to the storage for the readout value * * Weak dummy function for arches that do not yet support it. * Reads the time from the battery backed persistent clock. * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. * * XXX - Do be sure to remove it once all arches implement it. */ void __weak read_persistent_clock64(struct timespec64 *ts) { ts->tv_sec = 0; ts->tv_nsec = 0; } /** * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset * from the boot. * @wall_time: current time as returned by persistent clock * @boot_offset: offset that is defined as wall_time - boot_time * * Weak dummy function for arches that do not yet support it. * * The default function calculates offset based on the current value of * local_clock(). This way architectures that support sched_clock() but don't * support dedicated boot time clock will provide the best estimate of the * boot time. */ void __weak __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, struct timespec64 *boot_offset) { read_persistent_clock64(wall_time); *boot_offset = ns_to_timespec64(local_clock()); } static __init void tkd_basic_setup(struct tk_data *tkd) { raw_spin_lock_init(&tkd->lock); seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); } /* * Flag reflecting whether timekeeping_resume() has injected sleeptime. * * The flag starts of false and is only set when a suspend reaches * timekeeping_suspend(), timekeeping_resume() sets it to false when the * timekeeper clocksource is not stopping across suspend and has been * used to update sleep time. If the timekeeper clocksource has stopped * then the flag stays true and is used by the RTC resume code to decide * whether sleeptime must be injected and if so the flag gets false then. * * If a suspend fails before reaching timekeeping_resume() then the flag * stays false and prevents erroneous sleeptime injection. */ static bool suspend_timing_needed; /* Flag for if there is a persistent clock on this platform */ static bool persistent_clock_exists; /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ void __init timekeeping_init(void) { struct timespec64 wall_time, boot_offset, wall_to_mono; struct timekeeper *tks = &tk_core.shadow_timekeeper; struct clocksource *clock; tkd_basic_setup(&tk_core); read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); if (timespec64_valid_settod(&wall_time) && timespec64_to_ns(&wall_time) > 0) { persistent_clock_exists = true; } else if (timespec64_to_ns(&wall_time) != 0) { pr_warn("Persistent clock returned invalid value"); wall_time = (struct timespec64){0}; } if (timespec64_compare(&wall_time, &boot_offset) < 0) boot_offset = (struct timespec64){0}; /* * We want set wall_to_mono, so the following is true: * wall time + wall_to_mono = boot time */ wall_to_mono = timespec64_sub(boot_offset, wall_time); guard(raw_spinlock_irqsave)(&tk_core.lock); ntp_init(); clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); tk_setup_internals(tks, clock); tk_set_xtime(tks, &wall_time); tks->raw_sec = 0; tk_set_wall_to_mono(tks, wall_to_mono); timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); } /* time in seconds when suspend began for persistent clock */ static struct timespec64 timekeeping_suspend_time; /** * __timekeeping_inject_sleeptime - Internal function to add sleep interval * @tk: Pointer to the timekeeper to be updated * @delta: Pointer to the delta value in timespec64 format * * Takes a timespec offset measuring a suspend interval and properly * adds the sleep offset to the timekeeping variables. */ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, const struct timespec64 *delta) { if (!timespec64_valid_strict(delta)) { printk_deferred(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " "sleep delta value!\n"); return; } tk_xtime_add(tk, delta); tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); tk_debug_account_sleep_time(delta); } #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) /* * We have three kinds of time sources to use for sleep time * injection, the preference order is: * 1) non-stop clocksource * 2) persistent clock (ie: RTC accessible when irqs are off) * 3) RTC * * 1) and 2) are used by timekeeping, 3) by RTC subsystem. * If system has neither 1) nor 2), 3) will be used finally. * * * If timekeeping has injected sleeptime via either 1) or 2), * 3) becomes needless, so in this case we don't need to call * rtc_resume(), and this is what timekeeping_rtc_skipresume() * means. */ bool timekeeping_rtc_skipresume(void) { return !suspend_timing_needed; } /* * 1) can be determined whether to use or not only when doing * timekeeping_resume() which is invoked after rtc_suspend(), * so we can't skip rtc_suspend() surely if system has 1). * * But if system has 2), 2) will definitely be used, so in this * case we don't need to call rtc_suspend(), and this is what * timekeeping_rtc_skipsuspend() means. */ bool timekeeping_rtc_skipsuspend(void) { return persistent_clock_exists; } /** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. * and also don't have an effective nonstop clocksource. * * This function should only be called by rtc_resume(), and allows * a suspend offset to be injected into the timekeeping values. */ void timekeeping_inject_sleeptime64(const struct timespec64 *delta) { scoped_guard(raw_spinlock_irqsave, &tk_core.lock) { struct timekeeper *tks = &tk_core.shadow_timekeeper; suspend_timing_needed = false; timekeeping_forward_now(tks); __timekeeping_inject_sleeptime(tks, delta); timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); } /* Signal hrtimers about time change */ clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT); } #endif /** * timekeeping_resume - Resumes the generic timekeeping subsystem. */ void timekeeping_resume(void) { struct timekeeper *tks = &tk_core.shadow_timekeeper; struct clocksource *clock = tks->tkr_mono.clock; struct timespec64 ts_new, ts_delta; bool inject_sleeptime = false; u64 cycle_now, nsec; unsigned long flags; read_persistent_clock64(&ts_new); clockevents_resume(); clocksource_resume(); raw_spin_lock_irqsave(&tk_core.lock, flags); /* * After system resumes, we need to calculate the suspended time and * compensate it for the OS time. There are 3 sources that could be * used: Nonstop clocksource during suspend, persistent clock and rtc * device. * * One specific platform may have 1 or 2 or all of them, and the * preference will be: * suspend-nonstop clocksource -> persistent clock -> rtc * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ cycle_now = tk_clock_read(&tks->tkr_mono); nsec = clocksource_stop_suspend_timing(clock, cycle_now); if (nsec > 0) { ts_delta = ns_to_timespec64(nsec); inject_sleeptime = true; } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); inject_sleeptime = true; } if (inject_sleeptime) { suspend_timing_needed = false; __timekeeping_inject_sleeptime(tks, &ts_delta); } /* Re-base the last cycle value */ tks->tkr_mono.cycle_last = cycle_now; tks->tkr_raw.cycle_last = cycle_now; tks->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); raw_spin_unlock_irqrestore(&tk_core.lock, flags); touch_softlockup_watchdog(); /* Resume the clockevent device(s) and hrtimers */ tick_resume(); /* Notify timerfd as resume is equivalent to clock_was_set() */ timerfd_resume(); } int timekeeping_suspend(void) { struct timekeeper *tks = &tk_core.shadow_timekeeper; struct timespec64 delta, delta_delta; static struct timespec64 old_delta; struct clocksource *curr_clock; unsigned long flags; u64 cycle_now; read_persistent_clock64(&timekeeping_suspend_time); /* * On some systems the persistent_clock can not be detected at * timekeeping_init by its return value, so if we see a valid * value returned, update the persistent_clock_exists flag. */ if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) persistent_clock_exists = true; suspend_timing_needed = true; raw_spin_lock_irqsave(&tk_core.lock, flags); timekeeping_forward_now(tks); timekeeping_suspended = 1; /* * Since we've called forward_now, cycle_last stores the value * just read from the current clocksource. Save this to potentially * use in suspend timing. */ curr_clock = tks->tkr_mono.clock; cycle_now = tks->tkr_mono.cycle_last; clocksource_start_suspend_timing(curr_clock, cycle_now); if (persistent_clock_exists) { /* * To avoid drift caused by repeated suspend/resumes, * which each can add ~1 second drift error, * try to compensate so the difference in system time * and persistent_clock time stays close to constant. */ delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time); delta_delta = timespec64_sub(delta, old_delta); if (abs(delta_delta.tv_sec) >= 2) { /* * if delta_delta is too large, assume time correction * has occurred and set old_delta to the current delta. */ old_delta = delta; } else { /* Otherwise try to adjust old_system to compensate */ timekeeping_suspend_time = timespec64_add(timekeeping_suspend_time, delta_delta); } } timekeeping_update_from_shadow(&tk_core, 0); halt_fast_timekeeper(tks); raw_spin_unlock_irqrestore(&tk_core.lock, flags); tick_suspend(); clocksource_suspend(); clockevents_suspend(); return 0; } /* sysfs resume/suspend bits for timekeeping */ static struct syscore_ops timekeeping_syscore_ops = { .resume = timekeeping_resume, .suspend = timekeeping_suspend, }; static int __init timekeeping_init_ops(void) { register_syscore_ops(&timekeeping_syscore_ops); return 0; } device_initcall(timekeeping_init_ops); /* * Apply a multiplier adjustment to the timekeeper */ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, s64 offset, s32 mult_adj) { s64 interval = tk->cycle_interval; if (mult_adj == 0) { return; } else if (mult_adj == -1) { interval = -interval; offset = -offset; } else if (mult_adj != 1) { interval *= mult_adj; offset *= mult_adj; } /* * So the following can be confusing. * * To keep things simple, lets assume mult_adj == 1 for now. * * When mult_adj != 1, remember that the interval and offset values * have been appropriately scaled so the math is the same. * * The basic idea here is that we're increasing the multiplier * by one, this causes the xtime_interval to be incremented by * one cycle_interval. This is because: * xtime_interval = cycle_interval * mult * So if mult is being incremented by one: * xtime_interval = cycle_interval * (mult + 1) * Its the same as: * xtime_interval = (cycle_interval * mult) + cycle_interval * Which can be shortened to: * xtime_interval += cycle_interval * * So offset stores the non-accumulated cycles. Thus the current * time (in shifted nanoseconds) is: * now = (offset * adj) + xtime_nsec * Now, even though we're adjusting the clock frequency, we have * to keep time consistent. In other words, we can't jump back * in time, and we also want to avoid jumping forward in time. * * So given the same offset value, we need the time to be the same * both before and after the freq adjustment. * now = (offset * adj_1) + xtime_nsec_1 * now = (offset * adj_2) + xtime_nsec_2 * So: * (offset * adj_1) + xtime_nsec_1 = * (offset * adj_2) + xtime_nsec_2 * And we know: * adj_2 = adj_1 + 1 * So: * (offset * adj_1) + xtime_nsec_1 = * (offset * (adj_1+1)) + xtime_nsec_2 * (offset * adj_1) + xtime_nsec_1 = * (offset * adj_1) + offset + xtime_nsec_2 * Canceling the sides: * xtime_nsec_1 = offset + xtime_nsec_2 * Which gives us: * xtime_nsec_2 = xtime_nsec_1 - offset * Which simplifies to: * xtime_nsec -= offset */ if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { /* NTP adjustment caused clocksource mult overflow */ WARN_ON_ONCE(1); return; } tk->tkr_mono.mult += mult_adj; tk->xtime_interval += interval; tk->tkr_mono.xtime_nsec -= offset; } /* * Adjust the timekeeper's multiplier to the correct frequency * and also to reduce the accumulated error value. */ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { u64 ntp_tl = ntp_tick_length(); u32 mult; /* * Determine the multiplier from the current NTP tick length. * Avoid expensive division when the tick length doesn't change. */ if (likely(tk->ntp_tick == ntp_tl)) { mult = tk->tkr_mono.mult - tk->ntp_err_mult; } else { tk->ntp_tick = ntp_tl; mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - tk->xtime_remainder, tk->cycle_interval); } /* * If the clock is behind the NTP time, increase the multiplier by 1 * to catch up with it. If it's ahead and there was a remainder in the * tick division, the clock will slow down. Otherwise it will stay * ahead until the tick length changes to a non-divisible value. */ tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0; mult += tk->ntp_err_mult; timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult); if (unlikely(tk->tkr_mono.clock->maxadj && (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) > tk->tkr_mono.clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); } /* * It may be possible that when we entered this function, xtime_nsec * was very small. Further, if we're slightly speeding the clocksource * in the code above, its possible the required corrective factor to * xtime_nsec could cause it to underflow. * * Now, since we have already accumulated the second and the NTP * subsystem has been notified via second_overflow(), we need to skip * the next update. */ if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec--; tk->skip_second_overflow = 1; } } /* * accumulate_nsecs_to_secs - Accumulates nsecs into secs * * Helper function that accumulates the nsecs greater than a second * from the xtime_nsec field to the xtime_secs field. * It also calls into the NTP code to handle leapsecond processing. */ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; unsigned int clock_set = 0; while (tk->tkr_mono.xtime_nsec >= nsecps) { int leap; tk->tkr_mono.xtime_nsec -= nsecps; tk->xtime_sec++; /* * Skip NTP update if this second was accumulated before, * i.e. xtime_nsec underflowed in timekeeping_adjust() */ if (unlikely(tk->skip_second_overflow)) { tk->skip_second_overflow = 0; continue; } /* Figure out if its a leap sec and apply if needed */ leap = second_overflow(tk->xtime_sec); if (unlikely(leap)) { struct timespec64 ts; tk->xtime_sec += leap; ts.tv_sec = leap; ts.tv_nsec = 0; tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts)); __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); clock_set = TK_CLOCK_WAS_SET; } } return clock_set; } /* * logarithmic_accumulation - shifted accumulation of cycles * * This functions accumulates a shifted interval of cycles into * a shifted interval nanoseconds. Allows for O(log) accumulation * loop. * * Returns the unconsumed cycles. */ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, u32 shift, unsigned int *clock_set) { u64 interval = tk->cycle_interval << shift; u64 snsec_per_sec; /* If the offset is smaller than a shifted interval, do nothing */ if (offset < interval) return offset; /* Accumulate one shifted interval */ offset -= interval; tk->tkr_mono.cycle_last += interval; tk->tkr_raw.cycle_last += interval; tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { tk->tkr_raw.xtime_nsec -= snsec_per_sec; tk->raw_sec++; } /* Accumulate error between NTP and clock interval */ tk->ntp_error += tk->ntp_tick << shift; tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << (tk->ntp_error_shift + shift); return offset; } /* * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length */ static bool timekeeping_advance(enum timekeeping_adv_mode mode) { struct timekeeper *tk = &tk_core.shadow_timekeeper; struct timekeeper *real_tk = &tk_core.timekeeper; unsigned int clock_set = 0; int shift = 0, maxshift; u64 offset; guard(raw_spinlock_irqsave)(&tk_core.lock); /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return false; offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); /* Check if there's really nothing to do */ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, * we calculate the largest doubling multiple of cycle_intervals * that is smaller than the offset. We then accumulate that * chunk in one go, and then try to consume the next smaller * doubled multiple. */ shift = ilog2(offset) - ilog2(tk->cycle_interval); shift = max(0, shift); /* Bound shift to one less than what overflows tick_length */ maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= tk->cycle_interval) { offset = logarithmic_accumulation(tk, offset, shift, &clock_set); if (offset < tk->cycle_interval<<shift) shift--; } /* Adjust the multiplier to correct NTP error */ timekeeping_adjust(tk, offset); /* * Finally, make sure that after the rounding * xtime_nsec isn't larger than NSEC_PER_SEC */ clock_set |= accumulate_nsecs_to_secs(tk); timekeeping_update_from_shadow(&tk_core, clock_set); return !!clock_set; } /** * update_wall_time - Uses the current clocksource to increment the wall time * */ void update_wall_time(void) { if (timekeeping_advance(TK_ADV_TICK)) clock_was_set_delayed(); } /** * getboottime64 - Return the real time of system boot. * @ts: pointer to the timespec64 to be set * * Returns the wall-time of boot in a timespec64. * * This is based on the wall_to_monotonic offset and the total suspend * time. Calls to settimeofday will affect the value returned (which * basically means that however wrong your real time clock is at boot time, * you get the right time here). */ void getboottime64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); *ts = ktime_to_timespec64(t); } EXPORT_SYMBOL_GPL(getboottime64); void ktime_get_coarse_real_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; do { seq = read_seqcount_begin(&tk_core.seq); *ts = tk_xtime(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); /** * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor * @ts: timespec64 to be filled * * Fetch the global mg_floor value, convert it to realtime and compare it * to the current coarse-grained time. Fill @ts with whichever is * latest. Note that this is a filesystem-specific interface and should be * avoided outside of that context. */ void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; u64 floor = atomic64_read(&mg_floor); ktime_t f_real, offset, coarse; unsigned int seq; do { seq = read_seqcount_begin(&tk_core.seq); *ts = tk_xtime(tk); offset = tk_core.timekeeper.offs_real; } while (read_seqcount_retry(&tk_core.seq, seq)); coarse = timespec64_to_ktime(*ts); f_real = ktime_add(floor, offset); if (ktime_after(f_real, coarse)) *ts = ktime_to_timespec64(f_real); } /** * ktime_get_real_ts64_mg - attempt to update floor value and return result * @ts: pointer to the timespec to be set * * Get a monotonic fine-grained time value and attempt to swap it into * mg_floor. If that succeeds then accept the new floor value. If it fails * then another task raced in during the interim time and updated the * floor. Since any update to the floor must be later than the previous * floor, either outcome is acceptable. * * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), * and determining that the resulting coarse-grained timestamp did not effect * a change in ctime. Any more recent floor value would effect a change to * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. * * @ts will be filled with the latest floor value, regardless of the outcome of * the cmpxchg. Note that this is a filesystem specific interface and should be * avoided outside of that context. */ void ktime_get_real_ts64_mg(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; ktime_t old = atomic64_read(&mg_floor); ktime_t offset, mono; unsigned int seq; u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; mono = tk->tkr_mono.base; nsecs = timekeeping_get_ns(&tk->tkr_mono); offset = tk_core.timekeeper.offs_real; } while (read_seqcount_retry(&tk_core.seq, seq)); mono = ktime_add_ns(mono, nsecs); /* * Attempt to update the floor with the new time value. As any * update must be later then the existing floor, and would effect * a change to ctime from the perspective of the current task, * accept the resulting floor value regardless of the outcome of * the swap. */ if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); timekeeping_inc_mg_floor_swaps(); } else { /* * Another task changed mg_floor since "old" was fetched. * "old" has been updated with the latest value of "mg_floor". * That value is newer than the previous floor value, which * is enough to effect a change to ctime. Accept it. */ *ts = ktime_to_timespec64(ktime_add(old, offset)); } } void ktime_get_coarse_ts64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 now, mono; unsigned int seq; do { seq = read_seqcount_begin(&tk_core.seq); now = tk_xtime(tk); mono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, now.tv_nsec + mono.tv_nsec); } EXPORT_SYMBOL(ktime_get_coarse_ts64); /* * Must hold jiffies_lock */ void do_timer(unsigned long ticks) { jiffies_64 += ticks; calc_global_load(); } /** * ktime_get_update_offsets_now - hrtimer helper * @cwsseq: pointer to check and store the clock was set sequence number * @offs_real: pointer to storage for monotonic -> realtime offset * @offs_boot: pointer to storage for monotonic -> boottime offset * @offs_tai: pointer to storage for monotonic -> clock tai offset * * Returns current monotonic time and updates the offsets if the * sequence number in @cwsseq and timekeeper.clock_was_set_seq are * different. * * Called from hrtimer_interrupt() or retrigger_next_event() */ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, ktime_t *offs_boot, ktime_t *offs_tai) { struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; ktime_t base; u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); base = tk->tkr_mono.base; nsecs = timekeeping_get_ns(&tk->tkr_mono); base = ktime_add_ns(base, nsecs); if (*cwsseq != tk->clock_was_set_seq) { *cwsseq = tk->clock_was_set_seq; *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; *offs_tai = tk->offs_tai; } /* Handle leapsecond insertion adjustments */ if (unlikely(base >= tk->next_leap_ktime)) *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0)); } while (read_seqcount_retry(&tk_core.seq, seq)); return base; } /* * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex */ static int timekeeping_validate_timex(const struct __kernel_timex *txc) { if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) return -EINVAL; if (!(txc->modes & ADJ_OFFSET_READONLY) && !capable(CAP_SYS_TIME)) return -EPERM; } else { /* In order to modify anything, you gotta be super-user! */ if (txc->modes && !capable(CAP_SYS_TIME)) return -EPERM; /* * if the quartz is off by more than 10% then * something is VERY wrong! */ if (txc->modes & ADJ_TICK && (txc->tick < 900000/USER_HZ || txc->tick > 1100000/USER_HZ)) return -EINVAL; } if (txc->modes & ADJ_SETOFFSET) { /* In order to inject time, you gotta be super-user! */ if (!capable(CAP_SYS_TIME)) return -EPERM; /* * Validate if a timespec/timeval used to inject a time * offset is valid. Offsets can be positive or negative, so * we don't check tv_sec. The value of the timeval/timespec * is the sum of its fields,but *NOTE*: * The field tv_usec/tv_nsec must always be non-negative and * we can't have more nanoseconds/microseconds than a second. */ if (txc->time.tv_usec < 0) return -EINVAL; if (txc->modes & ADJ_NANO) { if (txc->time.tv_usec >= NSEC_PER_SEC) return -EINVAL; } else { if (txc->time.tv_usec >= USEC_PER_SEC) return -EINVAL; } } /* * Check for potential multiplication overflows that can * only happen on 64-bit systems: */ if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { if (LLONG_MIN / PPM_SCALE > txc->freq) return -EINVAL; if (LLONG_MAX / PPM_SCALE < txc->freq) return -EINVAL; } return 0; } /** * random_get_entropy_fallback - Returns the raw clock source value, * used by random.c for platforms with no valid random_get_entropy(). */ unsigned long random_get_entropy_fallback(void) { struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; struct clocksource *clock = READ_ONCE(tkr->clock); if (unlikely(timekeeping_suspended || !clock)) return 0; return clock->read(clock); } EXPORT_SYMBOL_GPL(random_get_entropy_fallback); /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function * @txc: Pointer to kernel_timex structure containing NTP parameters */ int do_adjtimex(struct __kernel_timex *txc) { struct audit_ntp_data ad; bool offset_set = false; bool clock_set = false; struct timespec64 ts; int ret; /* Validate the data before disabling interrupts */ ret = timekeeping_validate_timex(txc); if (ret) return ret; add_device_randomness(txc, sizeof(*txc)); if (txc->modes & ADJ_SETOFFSET) { struct timespec64 delta; delta.tv_sec = txc->time.tv_sec; delta.tv_nsec = txc->time.tv_usec; if (!(txc->modes & ADJ_NANO)) delta.tv_nsec *= 1000; ret = timekeeping_inject_offset(&delta); if (ret) return ret; offset_set = delta.tv_sec != 0; audit_tk_injoffset(delta); } audit_ntp_init(&ad); ktime_get_real_ts64(&ts); add_device_randomness(&ts, sizeof(ts)); scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { struct timekeeper *tks = &tk_core.shadow_timekeeper; s32 orig_tai, tai; orig_tai = tai = tks->tai_offset; ret = __do_adjtimex(txc, &ts, &tai, &ad); if (tai != orig_tai) { __timekeeping_set_tai_offset(tks, tai); timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); clock_set = true; } else { tk_update_leap_state_all(&tk_core); } } audit_ntp_log(&ad); /* Update the multiplier immediately if frequency was set directly */ if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) clock_set |= timekeeping_advance(TK_ADV_FREQ); if (clock_set) clock_was_set(CLOCK_SET_WALL); ntp_notify_cmos_timer(offset_set); return ret; } #ifdef CONFIG_NTP_PPS /** * hardpps() - Accessor function to NTP __hardpps function * @phase_ts: Pointer to timespec64 structure representing phase timestamp * @raw_ts: Pointer to timespec64 structure representing raw timestamp */ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { guard(raw_spinlock_irqsave)(&tk_core.lock); __hardpps(phase_ts, raw_ts); } EXPORT_SYMBOL(hardpps); #endif /* CONFIG_NTP_PPS */
5 5 21 21 2 21 126 4 128 126 123 124 25 97 68 43 116 118 59 128 93 1 93 93 92 20 71 67 93 4 3 88 92 90 92 2 87 43 43 111 111 110 111 2 1 1 2 7 2 2 2 2 1 7 6 6 6 1 5 6 6 22 12 22 3 2 2 2 2 2 2 3 9 9 9 6 6 7 6 5 4 4 4 4 4 7 3 3 2 1 1 1 1 1 3 56 1 7 10 10 3 9 1 8 1 5 4 4 3 5 1 3 2 3 8 8 8 6 3 1 2 1 1 3 37 35 37 9 9 9 9 4 4 4 3 2 5 25 24 23 12 19 9 10 19 8 25 13 11 4 8 12 5 13 7 11 10 10 1 3 9 9 9 4 1 6 6 16 2 2 16 16 16 16 7 2 3 2 7 10 10 7 7 7 7 7 6 4 15 15 1 14 15 2 2 2 2 2 3 3 2 2 2 2 3 2 3 3 3 2 2 11 8 1 7 8 3 2 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016,2017 Facebook */ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/filter.h> #include <linux/perf_event.h> #include <uapi/linux/btf.h> #include <linux/rcupdate_trace.h> #include <linux/btf_ids.h> #include "map_in_map.h" #define ARRAY_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \ BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP) static void bpf_array_free_percpu(struct bpf_array *array) { int i; for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); cond_resched(); } } static int bpf_array_alloc_percpu(struct bpf_array *array) { void __percpu *ptr; int i; for (i = 0; i < array->map.max_entries; i++) { ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, GFP_USER | __GFP_NOWARN); if (!ptr) { bpf_array_free_percpu(array); return -ENOMEM; } array->pptrs[i] = ptr; cond_resched(); } return 0; } /* Called from syscall */ int array_map_alloc_check(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size == 0 || attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags) || (percpu && numa_node != NUMA_NO_NODE)) return -EINVAL; if (attr->map_type != BPF_MAP_TYPE_ARRAY && attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP)) return -EINVAL; if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY && attr->map_flags & BPF_F_PRESERVE_ELEMS) return -EINVAL; /* avoid overflow on round_up(map->value_size) */ if (attr->value_size > INT_MAX) return -E2BIG; /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) return -E2BIG; return 0; } static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL); u64 array_size, mask64; struct bpf_array *array; elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; /* On 32 bit archs roundup_pow_of_two() with max_entries that has * upper most bit set in u32 space is undefined behavior due to * resulting 1U << 32, so do it manually here in u64 space. */ mask64 = fls_long(max_entries - 1); mask64 = 1ULL << mask64; mask64 -= 1; index_mask = mask64; if (!bypass_spec_v1) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ max_entries = index_mask + 1; /* Check for overflows. */ if (max_entries < attr->max_entries) return ERR_PTR(-E2BIG); } array_size = sizeof(*array); if (percpu) { array_size += (u64) max_entries * sizeof(void *); } else { /* rely on vmalloc() to return page-aligned memory and * ensure array->value is exactly page-aligned */ if (attr->map_flags & BPF_F_MMAPABLE) { array_size = PAGE_ALIGN(array_size); array_size += PAGE_ALIGN((u64) max_entries * elem_size); } else { array_size += (u64) max_entries * elem_size; } } /* allocate all map elements and zero-initialize them */ if (attr->map_flags & BPF_F_MMAPABLE) { void *data; /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ data = bpf_map_area_mmapable_alloc(array_size, numa_node); if (!data) return ERR_PTR(-ENOMEM); array = data + PAGE_ALIGN(sizeof(struct bpf_array)) - offsetof(struct bpf_array, value); } else { array = bpf_map_area_alloc(array_size, numa_node); } if (!array) return ERR_PTR(-ENOMEM); array->index_mask = index_mask; array->map.bypass_spec_v1 = bypass_spec_v1; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } return &array->map; } static void *array_map_elem_ptr(struct bpf_array* array, u32 index) { return array->value + (u64)array->elem_size * index; } /* Called from syscall or from eBPF program */ static void *array_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return array->value + (u64)array->elem_size * (index & array->index_mask); } static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off) { struct bpf_array *array = container_of(map, struct bpf_array, map); if (map->max_entries != 1) return -ENOTSUPP; if (off >= map->value_size) return -EINVAL; *imm = (unsigned long)array->value; return 0; } static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, u32 *off) { struct bpf_array *array = container_of(map, struct bpf_array, map); u64 base = (unsigned long)array->value; u64 range = array->elem_size; if (map->max_entries != 1) return -ENOTSUPP; if (imm < base || imm >= base + range) return -ENOENT; *off = imm - base; return 0; } /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; u32 elem_size = array->elem_size; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; if (map->map_flags & BPF_F_INNER_MAP) return -EOPNOTSUPP; *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); } if (is_power_of_2(elem_size)) { *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); } else { *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); } *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (unlikely(index >= array->map.max_entries)) return NULL; return this_cpu_ptr(array->pptrs[index & array->index_mask]); } /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */ static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; if (!bpf_jit_supports_percpu_insn()) return -EOPNOTSUPP; if (map->map_flags & BPF_F_INNER_MAP) return -EOPNOTSUPP; BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0); *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs)); *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6); *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5); } *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0); return insn - insn_buf; } static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; if (cpu >= nr_cpu_ids) return NULL; if (unlikely(index >= array->map.max_entries)) return NULL; return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu, off = 0; u32 size; if (unlikely(index >= array->map.max_entries)) return -ENOENT; /* per_cpu areas are zero-filled and bpf programs can only * access 'value_size' of them, so copying rounded areas * will not leak any kernel data */ size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); check_and_init_map_value(map, value + off); off += size; } rcu_read_unlock(); return 0; } /* Called from syscall */ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { *next = 0; return 0; } if (index == array->map.max_entries - 1) return -ENOENT; *next = index + 1; return 0; } /* Called from syscall or from eBPF program */ static long array_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; char *val; if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; if (unlikely(map_flags & BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; if (unlikely((map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) return -EINVAL; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { val = this_cpu_ptr(array->pptrs[index & array->index_mask]); copy_map_value(map, val, value); bpf_obj_free_fields(array->map.record, val); } else { val = array->value + (u64)array->elem_size * (index & array->index_mask); if (map_flags & BPF_F_LOCK) copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); bpf_obj_free_fields(array->map.record, val); } return 0; } int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu, off = 0; u32 size; if (unlikely(map_flags > BPF_EXIST)) /* unknown flags */ return -EINVAL; if (unlikely(index >= array->map.max_entries)) /* all elements were pre-allocated, cannot insert a new one */ return -E2BIG; if (unlikely(map_flags == BPF_NOEXIST)) /* all elements already exist */ return -EEXIST; /* the user space will provide round_up(value_size, 8) bytes that * will be copied into per-cpu area. bpf programs can only access * value_size of it. During lookup the same extra bytes will be * returned or zeros which were zero-filled by percpu_alloc, * so no kernel data leaks possible */ size = array->elem_size; rcu_read_lock(); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); off += size; } rcu_read_unlock(); return 0; } /* Called from syscall or from eBPF program */ static long array_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } static void *array_map_vmalloc_addr(struct bpf_array *array) { return (void *)round_down((unsigned long)array, PAGE_SIZE); } static void array_map_free_timers_wq(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; /* We don't reset or free fields other than timer and workqueue * on uref dropping to zero. */ if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) { for (i = 0; i < array->map.max_entries; i++) { if (btf_record_has_field(map->record, BPF_TIMER)) bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); if (btf_record_has_field(map->record, BPF_WORKQUEUE)) bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); } } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; if (!IS_ERR_OR_NULL(map->record)) { if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { for (i = 0; i < array->map.max_entries; i++) { void __percpu *pptr = array->pptrs[i & array->index_mask]; int cpu; for_each_possible_cpu(cpu) { bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu)); cond_resched(); } } } else { for (i = 0; i < array->map.max_entries; i++) bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i)); } } if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); if (array->map.map_flags & BPF_F_MMAPABLE) bpf_map_area_free(array_map_vmalloc_addr(array)); else bpf_map_area_free(array); } static void array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { void *value; rcu_read_lock(); value = array_map_lookup_elem(map, key); if (!value) { rcu_read_unlock(); return; } if (map->btf_key_type_id) seq_printf(m, "%u: ", *(u32 *)key); btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); seq_putc(m, '\n'); rcu_read_unlock(); } static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; void __percpu *pptr; int cpu; rcu_read_lock(); seq_printf(m, "%u: {\n", *(u32 *)key); pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { seq_printf(m, "\tcpu%d: ", cpu); btf_type_seq_show(map->btf, map->btf_value_type_id, per_cpu_ptr(pptr, cpu), m); seq_putc(m, '\n'); } seq_puts(m, "}\n"); rcu_read_unlock(); } static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { u32 int_data; /* One exception for keyless BTF: .bss/.data/.rodata map */ if (btf_type_is_void(key_type)) { if (map->map_type != BPF_MAP_TYPE_ARRAY || map->max_entries != 1) return -EINVAL; if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) return -EINVAL; return 0; } if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) return -EINVAL; int_data = *(u32 *)(key_type + 1); /* bpf array can only take a u32 key. This check makes sure * that the btf matches the attr used during map_create. */ if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) return -EINVAL; return 0; } static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) { struct bpf_array *array = container_of(map, struct bpf_array, map); pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT; if (!(map->map_flags & BPF_F_MMAPABLE)) return -EINVAL; if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > PAGE_ALIGN((u64)array->map.max_entries * array->elem_size)) return -EINVAL; return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), vma->vm_pgoff + pgoff); } static bool array_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1) { if (!bpf_map_meta_equal(meta0, meta1)) return false; return meta0->map_flags & BPF_F_INNER_MAP ? true : meta0->max_entries == meta1->max_entries; } struct bpf_iter_seq_array_map_info { struct bpf_map *map; void *percpu_value_buf; u32 index; }; static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) { struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_map *map = info->map; struct bpf_array *array; u32 index; if (info->index >= map->max_entries) return NULL; if (*pos == 0) ++*pos; array = container_of(map, struct bpf_array, map); index = info->index & array->index_mask; if (info->percpu_value_buf) return (void *)(uintptr_t)array->pptrs[index]; return array_map_elem_ptr(array, index); } static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_map *map = info->map; struct bpf_array *array; u32 index; ++*pos; ++info->index; if (info->index >= map->max_entries) return NULL; array = container_of(map, struct bpf_array, map); index = info->index & array->index_mask; if (info->percpu_value_buf) return (void *)(uintptr_t)array->pptrs[index]; return array_map_elem_ptr(array, index); } static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_seq_array_map_info *info = seq->private; struct bpf_iter__bpf_map_elem ctx = {}; struct bpf_map *map = info->map; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_iter_meta meta; struct bpf_prog *prog; int off = 0, cpu = 0; void __percpu *pptr; u32 size; meta.seq = seq; prog = bpf_iter_get_info(&meta, v == NULL); if (!prog) return 0; ctx.meta = &meta; ctx.map = info->map; if (v) { ctx.key = &info->index; if (!info->percpu_value_buf) { ctx.value = v; } else { pptr = (void __percpu *)(uintptr_t)v; size = array->elem_size; for_each_possible_cpu(cpu) { copy_map_value_long(map, info->percpu_value_buf + off, per_cpu_ptr(pptr, cpu)); check_and_init_map_value(map, info->percpu_value_buf + off); off += size; } ctx.value = info->percpu_value_buf; } } return bpf_iter_run_prog(prog, &ctx); } static int bpf_array_map_seq_show(struct seq_file *seq, void *v) { return __bpf_array_map_seq_show(seq, v); } static void bpf_array_map_seq_stop(struct seq_file *seq, void *v) { if (!v) (void)__bpf_array_map_seq_show(seq, NULL); } static int bpf_iter_init_array_map(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_iter_seq_array_map_info *seq_info = priv_data; struct bpf_map *map = aux->map; struct bpf_array *array = container_of(map, struct bpf_array, map); void *value_buf; u32 buf_size; if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { buf_size = array->elem_size * num_possible_cpus(); value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); if (!value_buf) return -ENOMEM; seq_info->percpu_value_buf = value_buf; } /* bpf_iter_attach_map() acquires a map uref, and the uref may be * released before or in the middle of iterating map elements, so * acquire an extra map uref for iterator. */ bpf_map_inc_with_uref(map); seq_info->map = map; return 0; } static void bpf_iter_fini_array_map(void *priv_data) { struct bpf_iter_seq_array_map_info *seq_info = priv_data; bpf_map_put_with_uref(seq_info->map); kfree(seq_info->percpu_value_buf); } static const struct seq_operations bpf_array_map_seq_ops = { .start = bpf_array_map_seq_start, .next = bpf_array_map_seq_next, .stop = bpf_array_map_seq_stop, .show = bpf_array_map_seq_show, }; static const struct bpf_iter_seq_info iter_seq_info = { .seq_ops = &bpf_array_map_seq_ops, .init_seq_private = bpf_iter_init_array_map, .fini_seq_private = bpf_iter_fini_array_map, .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), }; static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags) { u32 i, key, num_elems = 0; struct bpf_array *array; bool is_percpu; u64 ret = 0; void *val; cant_migrate(); if (flags != 0) return -EINVAL; is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; array = container_of(map, struct bpf_array, map); for (i = 0; i < map->max_entries; i++) { if (is_percpu) val = this_cpu_ptr(array->pptrs[i]); else val = array_map_elem_ptr(array, i); num_elems++; key = i; ret = callback_fn((u64)(long)map, (u64)(long)&key, (u64)(long)val, (u64)(long)callback_ctx, 0); /* return value: 0 - continue, 1 - stop and return */ if (ret) break; } return num_elems; } static u64 array_map_mem_usage(const struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; u32 elem_size = array->elem_size; u64 entries = map->max_entries; u64 usage = sizeof(*array); if (percpu) { usage += entries * sizeof(void *); usage += entries * elem_size * num_possible_cpus(); } else { if (map->map_flags & BPF_F_MMAPABLE) { usage = PAGE_ALIGN(usage); usage += PAGE_ALIGN(entries * elem_size); } else { usage += entries * elem_size; } } return usage; } BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) const struct bpf_map_ops array_map_ops = { .map_meta_equal = array_map_meta_equal, .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_release_uref = array_map_free_timers_wq, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, .map_direct_value_addr = array_map_direct_value_addr, .map_direct_value_meta = array_map_direct_value_meta, .map_mmap = array_map_mmap, .map_seq_show_elem = array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; const struct bpf_map_ops percpu_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = percpu_array_map_lookup_elem, .map_gen_lookup = percpu_array_map_gen_lookup, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, }; static int fd_array_map_alloc_check(union bpf_attr *attr) { /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return -EINVAL; /* Program read-only/write-only not supported for special maps yet. */ if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) return -EINVAL; return array_map_alloc_check(attr); } static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); bpf_map_area_free(array); } static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return ERR_PTR(-EOPNOTSUPP); } /* only called from syscall */ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) { void **elem, *ptr; int ret = 0; if (!map->ops->map_fd_sys_lookup_elem) return -ENOTSUPP; rcu_read_lock(); elem = array_map_lookup_elem(map, key); if (elem && (ptr = READ_ONCE(*elem))) *value = map->ops->map_fd_sys_lookup_elem(ptr); else ret = -ENOENT; rcu_read_unlock(); return ret; } /* only called from syscall */ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) return -EINVAL; if (index >= array->map.max_entries) return -E2BIG; ufd = *(u32 *)value; new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); if (IS_ERR(new_ptr)) return PTR_ERR(new_ptr); if (map->ops->map_poke_run) { mutex_lock(&array->aux->poke_mutex); old_ptr = xchg(array->ptrs + index, new_ptr); map->ops->map_poke_run(map, index, old_ptr, new_ptr); mutex_unlock(&array->aux->poke_mutex); } else { old_ptr = xchg(array->ptrs + index, new_ptr); } if (old_ptr) map->ops->map_fd_put_ptr(map, old_ptr, true); return 0; } static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; if (map->ops->map_poke_run) { mutex_lock(&array->aux->poke_mutex); old_ptr = xchg(array->ptrs + index, NULL); map->ops->map_poke_run(map, index, old_ptr, NULL); mutex_unlock(&array->aux->poke_mutex); } else { old_ptr = xchg(array->ptrs + index, NULL); } if (old_ptr) { map->ops->map_fd_put_ptr(map, old_ptr, need_defer); return 0; } else { return -ENOENT; } } static long fd_array_map_delete_elem(struct bpf_map *map, void *key) { return __fd_array_map_delete_elem(map, key, true); } static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_prog *prog = bpf_prog_get(fd); bool is_extended; if (IS_ERR(prog)) return prog; if (prog->type == BPF_PROG_TYPE_EXT || !bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } mutex_lock(&prog->aux->ext_mutex); is_extended = prog->aux->is_extended; if (!is_extended) prog->aux->prog_array_member_cnt++; mutex_unlock(&prog->aux->ext_mutex); if (is_extended) { /* Extended prog can not be tail callee. It's to prevent a * potential infinite loop like: * tail callee prog entry -> tail callee prog subprog -> * freplace prog entry --tailcall-> tail callee prog entry. */ bpf_prog_put(prog); return ERR_PTR(-EBUSY); } return prog; } static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { struct bpf_prog *prog = ptr; mutex_lock(&prog->aux->ext_mutex); prog->aux->prog_array_member_cnt--; mutex_unlock(&prog->aux->ext_mutex); /* bpf_prog is freed after one RCU or tasks trace grace period */ bpf_prog_put(prog); } static u32 prog_fd_array_sys_lookup_elem(void *ptr) { return ((struct bpf_prog *)ptr)->aux->id; } /* decrement refcnt of all bpf_progs that are stored in this map */ static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) __fd_array_map_delete_elem(map, &i, need_defer); } static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { void **elem, *ptr; u32 prog_id; rcu_read_lock(); elem = array_map_lookup_elem(map, key); if (elem) { ptr = READ_ONCE(*elem); if (ptr) { seq_printf(m, "%u: ", *(u32 *)key); prog_id = prog_fd_array_sys_lookup_elem(ptr); btf_type_seq_show(map->btf, map->btf_value_type_id, &prog_id, m); seq_putc(m, '\n'); } } rcu_read_unlock(); } struct prog_poke_elem { struct list_head list; struct bpf_prog_aux *aux; }; static int prog_array_map_poke_track(struct bpf_map *map, struct bpf_prog_aux *prog_aux) { struct prog_poke_elem *elem; struct bpf_array_aux *aux; int ret = 0; aux = container_of(map, struct bpf_array, map)->aux; mutex_lock(&aux->poke_mutex); list_for_each_entry(elem, &aux->poke_progs, list) { if (elem->aux == prog_aux) goto out; } elem = kmalloc(sizeof(*elem), GFP_KERNEL); if (!elem) { ret = -ENOMEM; goto out; } INIT_LIST_HEAD(&elem->list); /* We must track the program's aux info at this point in time * since the program pointer itself may not be stable yet, see * also comment in prog_array_map_poke_run(). */ elem->aux = prog_aux; list_add_tail(&elem->list, &aux->poke_progs); out: mutex_unlock(&aux->poke_mutex); return ret; } static void prog_array_map_poke_untrack(struct bpf_map *map, struct bpf_prog_aux *prog_aux) { struct prog_poke_elem *elem, *tmp; struct bpf_array_aux *aux; aux = container_of(map, struct bpf_array, map)->aux; mutex_lock(&aux->poke_mutex); list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) { if (elem->aux == prog_aux) { list_del_init(&elem->list); kfree(elem); break; } } mutex_unlock(&aux->poke_mutex); } void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old) { WARN_ON_ONCE(1); } static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { struct prog_poke_elem *elem; struct bpf_array_aux *aux; aux = container_of(map, struct bpf_array, map)->aux; WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex)); list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; /* Few things to be aware of: * * 1) We can only ever access aux in this context, but * not aux->prog since it might not be stable yet and * there could be danger of use after free otherwise. * 2) Initially when we start tracking aux, the program * is not JITed yet and also does not have a kallsyms * entry. We skip these as poke->tailcall_target_stable * is not active yet. The JIT will do the final fixup * before setting it stable. The various * poke->tailcall_target_stable are successively * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; if (poke->reason != BPF_POKE_REASON_TAIL_CALL) continue; if (poke->tail_call.map != map || poke->tail_call.key != key) continue; bpf_arch_poke_desc_update(poke, new, old); } } } static void prog_array_map_clear_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_array_aux, work)->map; bpf_fd_array_map_clear(map, true); bpf_map_put(map); } static void prog_array_map_clear(struct bpf_map *map) { struct bpf_array_aux *aux = container_of(map, struct bpf_array, map)->aux; bpf_map_inc(map); schedule_work(&aux->work); } static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) { struct bpf_array_aux *aux; struct bpf_map *map; aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT); if (!aux) return ERR_PTR(-ENOMEM); INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); map = array_map_alloc(attr); if (IS_ERR(map)) { kfree(aux); return map; } container_of(map, struct bpf_array, map)->aux = aux; aux->map = map; return map; } static void prog_array_map_free(struct bpf_map *map) { struct prog_poke_elem *elem, *tmp; struct bpf_array_aux *aux; aux = container_of(map, struct bpf_array, map)->aux; list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) { list_del_init(&elem->list); kfree(elem); } kfree(aux); fd_array_map_free(map); } /* prog_array->aux->{type,jited} is a runtime binding. * Doing static check alone in the verifier is not enough. * Thus, prog_array_map cannot be used as an inner_map * and map_meta_equal is not implemented. */ const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = prog_array_map_alloc, .map_free = prog_array_map_free, .map_poke_track = prog_array_map_poke_track, .map_poke_untrack = prog_array_map_poke_untrack, .map_poke_run = prog_array_map_poke_run, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, struct file *map_file) { struct bpf_event_entry *ee; ee = kzalloc(sizeof(*ee), GFP_KERNEL); if (ee) { ee->event = perf_file->private_data; ee->perf_file = perf_file; ee->map_file = map_file; } return ee; } static void __bpf_event_entry_free(struct rcu_head *rcu) { struct bpf_event_entry *ee; ee = container_of(rcu, struct bpf_event_entry, rcu); fput(ee->perf_file); kfree(ee); } static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) { call_rcu(&ee->rcu, __bpf_event_entry_free); } static void *perf_event_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_event_entry *ee; struct perf_event *event; struct file *perf_file; u64 value; perf_file = perf_event_get(fd); if (IS_ERR(perf_file)) return perf_file; ee = ERR_PTR(-EOPNOTSUPP); event = perf_file->private_data; if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) goto err_out; ee = bpf_event_entry_gen(perf_file, map_file); if (ee) return ee; ee = ERR_PTR(-ENOMEM); err_out: fput(perf_file); return ee; } static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { /* bpf_perf_event is freed after one RCU grace period */ bpf_event_entry_free_rcu(ptr); } static void perf_event_fd_array_release(struct bpf_map *map, struct file *map_file) { struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_event_entry *ee; int i; if (map->map_flags & BPF_F_PRESERVE_ELEMS) return; rcu_read_lock(); for (i = 0; i < array->map.max_entries; i++) { ee = READ_ONCE(array->ptrs[i]); if (ee && ee->map_file == map_file) __fd_array_map_delete_elem(map, &i, true); } rcu_read_unlock(); } static void perf_event_fd_array_map_free(struct bpf_map *map) { if (map->map_flags & BPF_F_PRESERVE_ELEMS) bpf_fd_array_map_clear(map, false); fd_array_map_free(map); } const struct bpf_map_ops perf_event_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = perf_event_fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = perf_event_fd_array_get_ptr, .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; #ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) { return cgroup_get_from_fd(fd); } static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { /* cgroup_put free cgrp after a rcu grace period */ cgroup_put(ptr); } static void cgroup_fd_array_free(struct bpf_map *map) { bpf_fd_array_map_clear(map, false); fd_array_map_free(map); } const struct bpf_map_ops cgroup_array_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = cgroup_fd_array_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], }; #endif static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) { struct bpf_map *map, *inner_map_meta; inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); if (IS_ERR(inner_map_meta)) return inner_map_meta; map = array_map_alloc(attr); if (IS_ERR(map)) { bpf_map_meta_free(inner_map_meta); return map; } map->inner_map_meta = inner_map_meta; return map; } static void array_of_map_free(struct bpf_map *map) { /* map->inner_map_meta is only accessed by syscall which * is protected by fdget/fdput. */ bpf_map_meta_free(map->inner_map_meta); bpf_fd_array_map_clear(map, false); fd_array_map_free(map); } static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_map **inner_map = array_map_lookup_elem(map, key); if (!inner_map) return NULL; return READ_ONCE(*inner_map); } static int array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 elem_size = array->elem_size; struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; const int map_ptr = BPF_REG_1; const int index = BPF_REG_2; *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); } if (is_power_of_2(elem_size)) *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); else *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); return insn - insn_buf; } const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, .map_free = array_of_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = array_of_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, .map_check_btf = map_check_no_btf, .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], };
13 13 13 1 1 2 1 2 51 51 44 5 4 3 1 1 48 1 51 51 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 /* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> #include <net/addrconf.h> #include "rds_single_path.h" #include "rds.h" #include "ib.h" #include "ib_mr.h" static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; static atomic_t rds_ib_unloading; module_param(rds_ib_mr_1m_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); module_param(rds_ib_mr_8k_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA"); module_param(rds_ib_retry_count, int, 0444); MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); /* * we have a clumsy combination of RCU and a rwsem protecting this list * because it is used both in the get_mr fast path and while blocking in * the FMR flushing path. */ DECLARE_RWSEM(rds_ib_devices_lock); struct list_head rds_ib_devices; /* NOTE: if also grabbing ibdev lock, grab this first */ DEFINE_SPINLOCK(ib_nodev_conns_lock); LIST_HEAD(ib_nodev_conns); static void rds_ib_nodev_connect(void) { struct rds_ib_connection *ic; spin_lock(&ib_nodev_conns_lock); list_for_each_entry(ic, &ib_nodev_conns, ib_node) rds_conn_connect_if_down(ic->conn); spin_unlock(&ib_nodev_conns_lock); } static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) { struct rds_ib_connection *ic; unsigned long flags; spin_lock_irqsave(&rds_ibdev->spinlock, flags); list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) rds_conn_path_drop(&ic->conn->c_path[0], true); spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); } /* * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references * from interrupt context so we push freing off into a work struct in krdsd. */ static void rds_ib_dev_free(struct work_struct *work) { struct rds_ib_ipaddr *i_ipaddr, *i_next; struct rds_ib_device *rds_ibdev = container_of(work, struct rds_ib_device, free_work); if (rds_ibdev->mr_8k_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool); if (rds_ibdev->mr_1m_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); kfree(i_ipaddr); } kfree(rds_ibdev->vector_load); kfree(rds_ibdev); } void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) { BUG_ON(refcount_read(&rds_ibdev->refcount) == 0); if (refcount_dec_and_test(&rds_ibdev->refcount)) queue_work(rds_wq, &rds_ibdev->free_work); } static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) return -EOPNOTSUPP; /* Device must support FRWR */ if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) return -EOPNOTSUPP; rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) return -ENOMEM; spin_lock_init(&rds_ibdev->spinlock); refcount_set(&rds_ibdev->refcount, 1); INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); INIT_LIST_HEAD(&rds_ibdev->conn_list); rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); rds_ibdev->odp_capable = !!(device->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_WRITE) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_READ); rds_ibdev->max_1m_mrs = device->attrs.max_mr ? min_t(unsigned int, (device->attrs.max_mr / 2), rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; rds_ibdev->max_8k_mrs = device->attrs.max_mr ? min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size; rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; rds_ibdev->vector_load = kcalloc(device->num_comp_vectors, sizeof(int), GFP_KERNEL); if (!rds_ibdev->vector_load) { pr_err("RDS/IB: %s failed to allocate vector memory\n", __func__); ret = -ENOMEM; goto put_dev; } rds_ibdev->dev = device; rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { ret = PTR_ERR(rds_ibdev->pd); rds_ibdev->pd = NULL; goto put_dev; } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); if (IS_ERR(rds_ibdev->mr_1m_pool)) { ret = PTR_ERR(rds_ibdev->mr_1m_pool); rds_ibdev->mr_1m_pool = NULL; goto put_dev; } rds_ibdev->mr_8k_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); if (IS_ERR(rds_ibdev->mr_8k_pool)) { ret = PTR_ERR(rds_ibdev->mr_8k_pool); rds_ibdev->mr_8k_pool = NULL; goto put_dev; } rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge, rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); pr_info("RDS/IB: %s: added\n", device->name); down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); up_write(&rds_ib_devices_lock); refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); rds_ib_nodev_connect(); return 0; put_dev: rds_ib_dev_put(rds_ibdev); return ret; } /* * New connections use this to find the device to associate with the * connection. It's not in the fast path so we're not concerned about the * performance of the IB call. (As of this writing, it uses an interrupt * blocking spinlock to serialize walking a per-device list of all registered * clients.) * * RCU is used to handle incoming connections racing with device teardown. * Rather than use a lock to serialize removal from the client_data and * getting a new reference, we use an RCU grace period. The destruction * path removes the device from client_data and then waits for all RCU * readers to finish. * * A new connection can get NULL from this if its arriving on a * device that is in the process of being removed. */ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) { struct rds_ib_device *rds_ibdev; rcu_read_lock(); rds_ibdev = ib_get_client_data(device, &rds_ib_client); if (rds_ibdev) refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } /* * The IB stack is letting us know that a device is going away. This can * happen if the underlying HCA driver is removed or if PCI hotplug is removing * the pci function, for example. * * This can be called at any time and can be racing with any other RDS path. */ static void rds_ib_remove_one(struct ib_device *device, void *client_data) { struct rds_ib_device *rds_ibdev = client_data; rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ ib_set_client_data(device, &rds_ib_client, NULL); down_write(&rds_ib_devices_lock); list_del_rcu(&rds_ibdev->list); up_write(&rds_ib_devices_lock); /* * This synchronize rcu is waiting for readers of both the ib * client data and the devices list to finish before we drop * both of those references. */ synchronize_rcu(); rds_ib_dev_put(rds_ibdev); rds_ib_dev_put(rds_ibdev); } struct ib_client rds_ib_client = { .name = "rds_ib", .add = rds_ib_add_one, .remove = rds_ib_remove_one }; static int rds_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds_info_rdma_connection *iinfo = buffer; struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; if (conn->c_isv6) return 0; iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; if (ic) { iinfo->tos = conn->c_tos; iinfo->sl = ic->i_sl; } memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, (union ib_gid *)&iinfo->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo->max_send_wr = ic->i_send_ring.w_nr; iinfo->max_recv_wr = ic->i_recv_ring.w_nr; iinfo->max_send_sge = rds_ibdev->max_sge; rds_ib_get_mr_info(rds_ibdev, iinfo); iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs); } return 1; } #if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_conn_info_visitor(). */ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds6_info_rdma_connection *iinfo6 = buffer; struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; iinfo6->src_addr = conn->c_laddr; iinfo6->dst_addr = conn->c_faddr; if (ic) { iinfo6->tos = conn->c_tos; iinfo6->sl = ic->i_sl; } memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, (union ib_gid *)&iinfo6->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo6->max_send_wr = ic->i_send_ring.w_nr; iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; iinfo6->max_send_sge = rds_ibdev->max_sge; rds6_ib_get_mr_info(rds_ibdev, iinfo6); iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs); } return 1; } #endif static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; rds_for_each_conn_info(sock, len, iter, lens, rds_ib_conn_info_visitor, buffer, sizeof(struct rds_info_rdma_connection)); } #if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_ic_info(). */ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8]; rds_for_each_conn_info(sock, len, iter, lens, rds6_ib_conn_info_visitor, buffer, sizeof(struct rds6_info_rdma_connection)); } #endif /* * Early RDS/IB was built to only bind to an address if there is an IPoIB * device with that address set. * * If it were me, I'd advocate for something more flexible. Sending and * receiving should be device-agnostic. Transports would try and maintain * connections between peers who have messages queued. Userspace would be * allowed to influence which paths have priority. We could call userspace * asserting this policy "routing". */ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, __u32 scope_id) { int ret; struct rdma_cm_id *cm_id; #if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6; #endif struct sockaddr_in sin; struct sockaddr *sa; bool isv4; isv4 = ipv6_addr_v4mapped(addr); /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); if (isv4) { memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = addr->s6_addr32[3]; sa = (struct sockaddr *)&sin; } else { #if IS_ENABLED(CONFIG_IPV6) memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr; sin6.sin6_scope_id = scope_id; sa = (struct sockaddr *)&sin6; /* XXX Do a special IPv6 link local address check here. The * reason is that rdma_bind_addr() always succeeds with IPv6 * link local address regardless it is indeed configured in a * system. */ if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { struct net_device *dev; if (scope_id == 0) { ret = -EADDRNOTAVAIL; goto out; } /* Use init_net for now as RDS is not network * name space aware. */ dev = dev_get_by_index(&init_net, scope_id); if (!dev) { ret = -EADDRNOTAVAIL; goto out; } if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { dev_put(dev); ret = -EADDRNOTAVAIL; goto out; } dev_put(dev); } #else ret = -EADDRNOTAVAIL; goto out; #endif } /* rdma_bind_addr will only succeed for IB & iWARP devices */ ret = rdma_bind_addr(cm_id, sa); /* due to this, we will claim to support iWARP devices unless we check node_type. */ if (ret || !cm_id->device || cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI6c%%%u ret %d node type %d\n", addr, scope_id, ret, cm_id->device ? cm_id->device->node_type : -1); out: rdma_destroy_id(cm_id); return ret; } static void rds_ib_unregister_client(void) { ib_unregister_client(&rds_ib_client); /* wait for rds_ib_dev_free() to complete */ flush_workqueue(rds_wq); } static void rds_ib_set_unloading(void) { atomic_set(&rds_ib_unloading, 1); } static bool rds_ib_is_unloading(struct rds_connection *conn) { struct rds_conn_path *cp = &conn->c_path[0]; return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || atomic_read(&rds_ib_unloading) != 0); } void rds_ib_exit(void) { rds_ib_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); #endif rds_ib_unregister_client(); rds_ib_destroy_nodev_conns(); rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); rds_ib_mr_exit(); } static u8 rds_ib_get_tos_map(u8 tos) { /* 1:1 user to transport map for RDMA transport. * In future, if custom map is desired, hook can export * user configurable map. */ return tos; } struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, .xmit_path_complete = rds_ib_xmit_path_complete, .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, .conn_path_connect = rds_ib_conn_path_connect, .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, .cm_initiate_connect = rds_ib_cm_initiate_connect, .cm_handle_connect = rds_ib_cm_handle_connect, .cm_connect_complete = rds_ib_cm_connect_complete, .stats_info_copy = rds_ib_stats_info_copy, .exit = rds_ib_exit, .get_mr = rds_ib_get_mr, .sync_mr = rds_ib_sync_mr, .free_mr = rds_ib_free_mr, .flush_mrs = rds_ib_flush_mrs, .get_tos_map = rds_ib_get_tos_map, .t_owner = THIS_MODULE, .t_name = "infiniband", .t_unloading = rds_ib_is_unloading, .t_type = RDS_TRANS_IB }; int rds_ib_init(void) { int ret; INIT_LIST_HEAD(&rds_ib_devices); ret = rds_ib_mr_init(); if (ret) goto out; ret = ib_register_client(&rds_ib_client); if (ret) goto out_mr_exit; ret = rds_ib_sysctl_init(); if (ret) goto out_ibreg; ret = rds_ib_recv_init(); if (ret) goto out_sysctl; rds_trans_register(&rds_ib_transport); rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); #endif goto out; out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); out_mr_exit: rds_ib_mr_exit(); out: return ret; } MODULE_LICENSE("GPL");
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ASM_X86_SERPENT_SSE2_H #define ASM_X86_SERPENT_SSE2_H #include <linux/crypto.h> #include <crypto/serpent.h> #ifdef CONFIG_X86_32 #define SERPENT_PARALLEL_BLOCKS 4 asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, false); } static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, true); } static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_4way(ctx, dst, src); } #else #define SERPENT_PARALLEL_BLOCKS 8 asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, false); } static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, true); } static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_8way(ctx, dst, src); } #endif #endif
21 21 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef IOU_NAPI_H #define IOU_NAPI_H #include <linux/kernel.h> #include <linux/io_uring.h> #include <net/busy_poll.h> #ifdef CONFIG_NET_RX_BUSY_POLL void io_napi_init(struct io_ring_ctx *ctx); void io_napi_free(struct io_ring_ctx *ctx); int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); static inline bool io_napi(struct io_ring_ctx *ctx) { return !list_empty(&ctx->napi_list); } static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { if (!io_napi(ctx)) return; __io_napi_busy_loop(ctx, iowq); } /* * io_napi_add() - Add napi id to the busy poll list * @req: pointer to io_kiocb request * * Add the napi id of the socket to the napi busy poll list and hash table. */ static inline void io_napi_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct socket *sock; if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) return; sock = sock_from_file(req->file); if (sock && sock->sk) __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id)); } #else static inline void io_napi_init(struct io_ring_ctx *ctx) { } static inline void io_napi_free(struct io_ring_ctx *ctx) { } static inline int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { return -EOPNOTSUPP; } static inline int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) { return -EOPNOTSUPP; } static inline bool io_napi(struct io_ring_ctx *ctx) { return false; } static inline void io_napi_add(struct io_kiocb *req) { } static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { } static inline int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { return 0; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #endif
10 10 10 10 10 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2020 Facebook * Copyright 2020 Google LLC. */ #include <linux/pid.h> #include <linux/sched.h> #include <linux/rculist.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> #include <linux/bpf_local_storage.h> #include <linux/filter.h> #include <uapi/linux/btf.h> #include <linux/btf_ids.h> #include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(task_cache); static DEFINE_PER_CPU(int, bpf_task_storage_busy); static void bpf_task_storage_lock(void) { cant_migrate(); this_cpu_inc(bpf_task_storage_busy); } static void bpf_task_storage_unlock(void) { this_cpu_dec(bpf_task_storage_busy); } static bool bpf_task_storage_trylock(void) { cant_migrate(); if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { this_cpu_dec(bpf_task_storage_busy); return false; } return true; } static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) { struct task_struct *task = owner; return &task->bpf_storage; } static struct bpf_local_storage_data * task_storage_lookup(struct task_struct *task, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *task_storage; struct bpf_local_storage_map *smap; task_storage = rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); if (!task_storage) return NULL; smap = (struct bpf_local_storage_map *)map; return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); } void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; migrate_disable(); rcu_read_lock(); local_storage = rcu_dereference(task->bpf_storage); if (!local_storage) goto out; bpf_task_storage_lock(); bpf_local_storage_destroy(local_storage); bpf_task_storage_unlock(); out: rcu_read_unlock(); migrate_enable(); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) { struct bpf_local_storage_data *sdata; struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return ERR_CAST(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); sdata = task_storage_lookup(task, map, true); bpf_task_storage_unlock(); put_pid(pid); return sdata ? sdata->data : NULL; out: put_pid(pid); return ERR_PTR(err); } static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; if ((map_flags & BPF_F_LOCK) && btf_record_has_field(map->record, BPF_UPTR)) return -EOPNOTSUPP; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, true, GFP_ATOMIC); bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); out: put_pid(pid); return err; } static int task_storage_delete(struct task_struct *task, struct bpf_map *map, bool nobusy) { struct bpf_local_storage_data *sdata; sdata = task_storage_lookup(task, map, false); if (!sdata) return -ENOENT; if (!nobusy) return -EBUSY; bpf_selem_unlink(SELEM(sdata), false); return 0; } static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) { struct task_struct *task; unsigned int f_flags; struct pid *pid; int fd, err; fd = *(int *)key; pid = pidfd_get_pid(fd, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); /* We should be in an RCU read side critical section, it should be safe * to call pid_task. */ WARN_ON_ONCE(!rcu_read_lock_held()); task = pid_task(pid, PIDTYPE_PID); if (!task) { err = -ENOENT; goto out; } bpf_task_storage_lock(); err = task_storage_delete(task, map, true); bpf_task_storage_unlock(); out: put_pid(pid); return err; } /* Called by bpf_task_storage_get*() helpers */ static void *__bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags, gfp_t gfp_flags, bool nobusy) { struct bpf_local_storage_data *sdata; sdata = task_storage_lookup(task, map, nobusy); if (sdata) return sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); return IS_ERR(sdata) ? NULL : sdata->data; } return NULL; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { bool nobusy; void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; nobusy = bpf_task_storage_trylock(); data = __bpf_task_storage_get(map, task, value, flags, gfp_flags, nobusy); if (nobusy) bpf_task_storage_unlock(); return (unsigned long)data; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; bpf_task_storage_lock(); data = __bpf_task_storage_get(map, task, value, flags, gfp_flags, true); bpf_task_storage_unlock(); return (unsigned long)data; } BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { bool nobusy; int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; nobusy = bpf_task_storage_trylock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ ret = task_storage_delete(task, map, nobusy); if (nobusy) bpf_task_storage_unlock(); return ret; } BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; bpf_task_storage_lock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ ret = task_storage_delete(task, map, true); bpf_task_storage_unlock(); return ret; } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -ENOTSUPP; } static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) { return bpf_local_storage_map_alloc(attr, &task_cache, true); } static void task_storage_map_free(struct bpf_map *map) { bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); } BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map) const struct bpf_map_ops task_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, .map_alloc = task_storage_map_alloc, .map_free = task_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_pid_task_storage_lookup_elem, .map_update_elem = bpf_pid_task_storage_update_elem, .map_delete_elem = bpf_pid_task_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, .map_mem_usage = bpf_local_storage_map_mem_usage, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_owner_storage_ptr = task_storage_ptr, }; const struct bpf_func_proto bpf_task_storage_get_recur_proto = { .func = bpf_task_storage_get_recur, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_task_storage_get_proto = { .func = bpf_task_storage_get, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { .func = bpf_task_storage_delete_recur, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; const struct bpf_func_proto bpf_task_storage_delete_proto = { .func = bpf_task_storage_delete, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], };
1 1 1 2 2 2 2 2 2 2 2 1 2 2 2 6 6 6 4 4 4 3 4 3 4 4 2 2 2 2 2 2 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for NXP PN533 NFC Chip - USB transport layer * * Copyright (C) 2011 Instituto Nokia de Tecnologia * Copyright (C) 2012-2013 Tieto Poland */ #include <linux/device.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/nfc.h> #include <linux/netdevice.h> #include <net/nfc/nfc.h> #include "pn533.h" #define VERSION "0.1" #define PN533_VENDOR_ID 0x4CC #define PN533_PRODUCT_ID 0x2533 #define SCM_VENDOR_ID 0x4E6 #define SCL3711_PRODUCT_ID 0x5591 #define SONY_VENDOR_ID 0x054c #define PASORI_PRODUCT_ID 0x02e1 #define ACS_VENDOR_ID 0x072f #define ACR122U_PRODUCT_ID 0x2200 static const struct usb_device_id pn533_usb_table[] = { { USB_DEVICE(PN533_VENDOR_ID, PN533_PRODUCT_ID), .driver_info = PN533_DEVICE_STD }, { USB_DEVICE(SCM_VENDOR_ID, SCL3711_PRODUCT_ID), .driver_info = PN533_DEVICE_STD }, { USB_DEVICE(SONY_VENDOR_ID, PASORI_PRODUCT_ID), .driver_info = PN533_DEVICE_PASORI }, { USB_DEVICE(ACS_VENDOR_ID, ACR122U_PRODUCT_ID), .driver_info = PN533_DEVICE_ACR122U }, { } }; MODULE_DEVICE_TABLE(usb, pn533_usb_table); struct pn533_usb_phy { struct usb_device *udev; struct usb_interface *interface; struct urb *out_urb; struct urb *in_urb; struct urb *ack_urb; u8 *ack_buffer; struct pn533 *priv; }; static void pn533_recv_response(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; struct sk_buff *skb = NULL; if (!urb->status) { skb = alloc_skb(urb->actual_length, GFP_ATOMIC); if (!skb) { nfc_err(&phy->udev->dev, "failed to alloc memory\n"); } else { skb_put_data(skb, urb->transfer_buffer, urb->actual_length); } } pn533_recv_frame(phy->priv, skb, urb->status); } static int pn533_submit_urb_for_response(struct pn533_usb_phy *phy, gfp_t flags) { phy->in_urb->complete = pn533_recv_response; return usb_submit_urb(phy->in_urb, flags); } static void pn533_recv_ack(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; struct pn533 *priv = phy->priv; struct pn533_cmd *cmd = priv->cmd; struct pn533_std_frame *in_frame; int rc; cmd->status = urb->status; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); goto sched_wq; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); goto sched_wq; } in_frame = phy->in_urb->transfer_buffer; if (!pn533_rx_frame_is_ack(in_frame)) { nfc_err(&phy->udev->dev, "Received an invalid ack\n"); cmd->status = -EIO; goto sched_wq; } rc = pn533_submit_urb_for_response(phy, GFP_ATOMIC); if (rc) { nfc_err(&phy->udev->dev, "usb_submit_urb failed with result %d\n", rc); cmd->status = rc; goto sched_wq; } return; sched_wq: queue_work(priv->wq, &priv->cmd_complete_work); } static int pn533_submit_urb_for_ack(struct pn533_usb_phy *phy, gfp_t flags) { phy->in_urb->complete = pn533_recv_ack; return usb_submit_urb(phy->in_urb, flags); } static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) { struct pn533_usb_phy *phy = dev->phy; static const u8 ack[6] = {0x00, 0x00, 0xff, 0x00, 0xff, 0x00}; /* spec 7.1.1.3: Preamble, SoPC (2), ACK Code (2), Postamble */ if (!phy->ack_buffer) { phy->ack_buffer = kmemdup(ack, sizeof(ack), flags); if (!phy->ack_buffer) return -ENOMEM; } phy->ack_urb->transfer_buffer = phy->ack_buffer; phy->ack_urb->transfer_buffer_length = sizeof(ack); return usb_submit_urb(phy->ack_urb, flags); } struct pn533_out_arg { struct pn533_usb_phy *phy; struct completion done; }; static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; struct pn533_out_arg arg; void *cntx; int rc; if (phy->priv == NULL) phy->priv = dev; phy->out_urb->transfer_buffer = out->data; phy->out_urb->transfer_buffer_length = out->len; print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); arg.phy = phy; init_completion(&arg.done); cntx = phy->out_urb->context; phy->out_urb->context = &arg; rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; wait_for_completion(&arg.done); phy->out_urb->context = cntx; if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); if (rc) goto error; } else if (dev->protocol_type == PN533_PROTO_REQ_ACK_RESP) { /* request for ACK if that's the case */ rc = pn533_submit_urb_for_ack(phy, GFP_KERNEL); if (rc) goto error; } return 0; error: usb_unlink_urb(phy->out_urb); return rc; } static void pn533_usb_abort_cmd(struct pn533 *dev, gfp_t flags) { struct pn533_usb_phy *phy = dev->phy; /* ACR122U does not support any command which aborts last * issued command i.e. as ACK for standard PN533. Additionally, * it behaves stange, sending broken or incorrect responses, * when we cancel urb before the chip will send response. */ if (dev->device_type == PN533_DEVICE_ACR122U) return; /* An ack will cancel the last issued command */ pn533_usb_send_ack(dev, flags); /* cancel the urb request */ usb_kill_urb(phy->in_urb); } /* ACR122 specific structs and functions */ /* ACS ACR122 pn533 frame definitions */ #define PN533_ACR122_TX_FRAME_HEADER_LEN (sizeof(struct pn533_acr122_tx_frame) \ + 2) #define PN533_ACR122_TX_FRAME_TAIL_LEN 0 #define PN533_ACR122_RX_FRAME_HEADER_LEN (sizeof(struct pn533_acr122_rx_frame) \ + 2) #define PN533_ACR122_RX_FRAME_TAIL_LEN 2 #define PN533_ACR122_FRAME_MAX_PAYLOAD_LEN PN533_STD_FRAME_MAX_PAYLOAD_LEN /* CCID messages types */ #define PN533_ACR122_PC_TO_RDR_ICCPOWERON 0x62 #define PN533_ACR122_PC_TO_RDR_ESCAPE 0x6B #define PN533_ACR122_RDR_TO_PC_ESCAPE 0x83 struct pn533_acr122_ccid_hdr { u8 type; u32 datalen; u8 slot; u8 seq; /* * 3 msg specific bytes or status, error and 1 specific * byte for reposnse msg */ u8 params[3]; } __packed; struct pn533_acr122_apdu_hdr { u8 class; u8 ins; u8 p1; u8 p2; } __packed; struct pn533_acr122_tx_frame { struct pn533_acr122_ccid_hdr ccid; struct pn533_acr122_apdu_hdr apdu; u8 datalen; u8 data[]; /* pn533 frame: TFI ... */ } __packed; struct pn533_acr122_rx_frame { struct pn533_acr122_ccid_hdr ccid; u8 data[]; /* pn533 frame : TFI ... */ } __packed; static void pn533_acr122_tx_frame_init(void *_frame, u8 cmd_code) { struct pn533_acr122_tx_frame *frame = _frame; frame->ccid.type = PN533_ACR122_PC_TO_RDR_ESCAPE; /* sizeof(apdu_hdr) + sizeof(datalen) */ frame->ccid.datalen = sizeof(frame->apdu) + 1; frame->ccid.slot = 0; frame->ccid.seq = 0; frame->ccid.params[0] = 0; frame->ccid.params[1] = 0; frame->ccid.params[2] = 0; frame->data[0] = PN533_STD_FRAME_DIR_OUT; frame->data[1] = cmd_code; frame->datalen = 2; /* data[0] + data[1] */ frame->apdu.class = 0xFF; frame->apdu.ins = 0; frame->apdu.p1 = 0; frame->apdu.p2 = 0; } static void pn533_acr122_tx_frame_finish(void *_frame) { struct pn533_acr122_tx_frame *frame = _frame; frame->ccid.datalen += frame->datalen; } static void pn533_acr122_tx_update_payload_len(void *_frame, int len) { struct pn533_acr122_tx_frame *frame = _frame; frame->datalen += len; } static bool pn533_acr122_is_rx_frame_valid(void *_frame, struct pn533 *dev) { struct pn533_acr122_rx_frame *frame = _frame; if (frame->ccid.type != 0x83) return false; if (!frame->ccid.datalen) return false; if (frame->data[frame->ccid.datalen - 2] == 0x63) return false; return true; } static int pn533_acr122_rx_frame_size(void *frame) { struct pn533_acr122_rx_frame *f = frame; /* f->ccid.datalen already includes tail length */ return sizeof(struct pn533_acr122_rx_frame) + f->ccid.datalen; } static u8 pn533_acr122_get_cmd_code(void *frame) { struct pn533_acr122_rx_frame *f = frame; return PN533_FRAME_CMD(f); } static struct pn533_frame_ops pn533_acr122_frame_ops = { .tx_frame_init = pn533_acr122_tx_frame_init, .tx_frame_finish = pn533_acr122_tx_frame_finish, .tx_update_payload_len = pn533_acr122_tx_update_payload_len, .tx_header_len = PN533_ACR122_TX_FRAME_HEADER_LEN, .tx_tail_len = PN533_ACR122_TX_FRAME_TAIL_LEN, .rx_is_frame_valid = pn533_acr122_is_rx_frame_valid, .rx_header_len = PN533_ACR122_RX_FRAME_HEADER_LEN, .rx_tail_len = PN533_ACR122_RX_FRAME_TAIL_LEN, .rx_frame_size = pn533_acr122_rx_frame_size, .max_payload_len = PN533_ACR122_FRAME_MAX_PAYLOAD_LEN, .get_cmd_code = pn533_acr122_get_cmd_code, }; struct pn533_acr122_poweron_rdr_arg { int rc; struct completion done; }; static void pn533_acr122_poweron_rdr_resp(struct urb *urb) { struct pn533_acr122_poweron_rdr_arg *arg = urb->context; print_hex_dump_debug("ACR122 RX: ", DUMP_PREFIX_NONE, 16, 1, urb->transfer_buffer, urb->transfer_buffer_length, false); arg->rc = urb->status; complete(&arg->done); } static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) { /* Power on th reader (CCID cmd) */ u8 cmd[10] = {PN533_ACR122_PC_TO_RDR_ICCPOWERON, 0, 0, 0, 0, 0, 0, 3, 0, 0}; char *buffer; int transferred; int rc; void *cntx; struct pn533_acr122_poweron_rdr_arg arg; buffer = kmemdup(cmd, sizeof(cmd), GFP_KERNEL); if (!buffer) return -ENOMEM; init_completion(&arg.done); cntx = phy->in_urb->context; /* backup context */ phy->in_urb->complete = pn533_acr122_poweron_rdr_resp; phy->in_urb->context = &arg; print_hex_dump_debug("ACR122 TX: ", DUMP_PREFIX_NONE, 16, 1, cmd, sizeof(cmd), false); rc = usb_bulk_msg(phy->udev, phy->out_urb->pipe, buffer, sizeof(cmd), &transferred, 5000); kfree(buffer); if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, "Reader power on cmd error %d\n", rc); return rc; } rc = usb_submit_urb(phy->in_urb, GFP_KERNEL); if (rc) { nfc_err(&phy->udev->dev, "Can't submit reader poweron cmd response %d\n", rc); return rc; } wait_for_completion(&arg.done); phy->in_urb->context = cntx; /* restore context */ return arg.rc; } static void pn533_out_complete(struct urb *urb) { struct pn533_out_arg *arg = urb->context; struct pn533_usb_phy *phy = arg->phy; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); } complete(&arg->done); } static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); } } static const struct pn533_phy_ops usb_phy_ops = { .send_frame = pn533_usb_send_frame, .send_ack = pn533_usb_send_ack, .abort_cmd = pn533_usb_abort_cmd, }; static int pn533_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct pn533 *priv; struct pn533_usb_phy *phy; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int in_endpoint = 0; int out_endpoint = 0; int rc = -ENOMEM; int i; u32 protocols; enum pn533_protocol_type protocol_type = PN533_PROTO_REQ_ACK_RESP; struct pn533_frame_ops *fops = NULL; unsigned char *in_buf; int in_buf_len = PN533_EXT_FRAME_HEADER_LEN + PN533_STD_FRAME_MAX_PAYLOAD_LEN + PN533_STD_FRAME_TAIL_LEN; phy = devm_kzalloc(&interface->dev, sizeof(*phy), GFP_KERNEL); if (!phy) return -ENOMEM; in_buf = kzalloc(in_buf_len, GFP_KERNEL); if (!in_buf) return -ENOMEM; phy->udev = usb_get_dev(interface_to_usbdev(interface)); phy->interface = interface; iface_desc = interface->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; if (!in_endpoint && usb_endpoint_is_bulk_in(endpoint)) in_endpoint = endpoint->bEndpointAddress; if (!out_endpoint && usb_endpoint_is_bulk_out(endpoint)) out_endpoint = endpoint->bEndpointAddress; } if (!in_endpoint || !out_endpoint) { nfc_err(&interface->dev, "Could not find bulk-in or bulk-out endpoint\n"); rc = -ENODEV; goto error; } phy->in_urb = usb_alloc_urb(0, GFP_KERNEL); phy->out_urb = usb_alloc_urb(0, GFP_KERNEL); phy->ack_urb = usb_alloc_urb(0, GFP_KERNEL); if (!phy->in_urb || !phy->out_urb || !phy->ack_urb) goto error; usb_fill_bulk_urb(phy->in_urb, phy->udev, usb_rcvbulkpipe(phy->udev, in_endpoint), in_buf, in_buf_len, NULL, phy); usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: protocols = PN533_ALL_PROTOCOLS; break; case PN533_DEVICE_PASORI: protocols = PN533_NO_TYPE_B_PROTOCOLS; break; case PN533_DEVICE_ACR122U: protocols = PN533_NO_TYPE_B_PROTOCOLS; fops = &pn533_acr122_frame_ops; protocol_type = PN533_PROTO_REQ_RESP; rc = pn533_acr122_poweron_rdr(phy); if (rc < 0) { nfc_err(&interface->dev, "Couldn't poweron the reader (error %d)\n", rc); goto error; } break; default: nfc_err(&interface->dev, "Unknown device type %lu\n", id->driver_info); rc = -EINVAL; goto error; } priv = pn53x_common_init(id->driver_info, protocol_type, phy, &usb_phy_ops, fops, &phy->udev->dev); if (IS_ERR(priv)) { rc = PTR_ERR(priv); goto error; } phy->priv = priv; rc = pn533_finalize_setup(priv); if (rc) goto err_clean; usb_set_intfdata(interface, phy); rc = pn53x_register_nfc(priv, protocols, &interface->dev); if (rc) goto err_clean; return 0; err_clean: pn53x_common_clean(priv); error: usb_kill_urb(phy->in_urb); usb_kill_urb(phy->out_urb); usb_kill_urb(phy->ack_urb); usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); usb_put_dev(phy->udev); kfree(in_buf); kfree(phy->ack_buffer); return rc; } static void pn533_usb_disconnect(struct usb_interface *interface) { struct pn533_usb_phy *phy = usb_get_intfdata(interface); if (!phy) return; pn53x_unregister_nfc(phy->priv); pn53x_common_clean(phy->priv); usb_set_intfdata(interface, NULL); usb_kill_urb(phy->in_urb); usb_kill_urb(phy->out_urb); usb_kill_urb(phy->ack_urb); kfree(phy->in_urb->transfer_buffer); usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); kfree(phy->ack_buffer); nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n"); } static struct usb_driver pn533_usb_driver = { .name = "pn533_usb", .probe = pn533_usb_probe, .disconnect = pn533_usb_disconnect, .id_table = pn533_usb_table, }; module_usb_driver(pn533_usb_driver); MODULE_AUTHOR("Lauro Ramos Venancio <lauro.venancio@openbossa.org>"); MODULE_AUTHOR("Aloisio Almeida Jr <aloisio.almeida@openbossa.org>"); MODULE_AUTHOR("Waldemar Rymarkiewicz <waldemar.rymarkiewicz@tieto.com>"); MODULE_DESCRIPTION("PN533 USB driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL");
10 10 10 3 9 10 5 5 5 5 1 9 5 5 10 71 71 7 7 7 71 68 49 70 69 3 3 68 39 71 61 33 29 26 61 10 10 5 5 3 10 10 7 7 10 10 71 10 71 71 70 2 71 2 71 19 71 71 71 19 19 71 10 10 1 10 10 10 9 70 71 71 71 9 10 10 71 71 71 60 61 10 10 10 10 8 8 8 7 7 10 8 8 10 9 5 5 10 10 10 9 10 7 2 7 10 6 6 6 6 4 10 9 10 10 10 10 71 29 29 21 29 21 21 21 3 21 17 17 17 17 17 17 17 17 17 17 17 25 25 25 25 15 1 3 2 1 1 1 24 23 23 2 22 1 23 1 23 2 1 1 23 1 23 1 23 2 2 23 1 23 5 3 23 2 21 1 18 3 23 1 23 2 23 1 23 1 23 23 23 16 16 16 23 23 23 28 25 24 1 25 15 15 15 15 15 15 15 15 15 14 15 15 14 15 15 15 15 15 15 15 15 15 15 15 15 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * * Copyright (C) 2013-2023 Eric Dumazet <edumazet@google.com> * * Meant to be mostly used for locally generated traffic : * Fast classification depends on skb->sk being set before reaching us. * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash. * All packets belonging to a socket are considered as a 'flow'. * * Flows are dynamically allocated and stored in a hash table of RB trees * They are also part of one Round Robin 'queues' (new or old flows) * * Burst avoidance (aka pacing) capability : * * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a * bunch of packets, and this packet scheduler adds delay between * packets to respect rate limitation. * * enqueue() : * - lookup one RB tree (out of 1024 or more) to find the flow. * If non existent flow, create it, add it to the tree. * Add skb to the per flow list of skb (fifo). * - Use a special fifo for high prio packets * * dequeue() : serves flows in Round Robin * Note : When a flow becomes empty, we do not immediately remove it from * rb trees, for performance reasons (its expected to send additional packets, * or SLAB cache will reuse socket for another flow) */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/string.h> #include <linux/in.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/hash.h> #include <linux/prefetch.h> #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/tcp.h> struct fq_skb_cb { u64 time_to_send; u8 band; }; static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) { qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb)); return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data; } /* * Per flow structure, dynamically allocated. * If packets have monotically increasing time_to_send, they are placed in O(1) * in linear list (head,tail), otherwise are placed in a rbtree (t_root). */ struct fq_flow { /* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */ struct rb_root t_root; struct sk_buff *head; /* list of skbs for this flow : first skb */ union { struct sk_buff *tail; /* last skb in the list */ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ }; union { struct rb_node fq_node; /* anchor in fq_root[] trees */ /* Following field is only used for q->internal, * because q->internal is not hashed in fq_root[] */ u64 stat_fastpath_packets; }; struct sock *sk; u32 socket_hash; /* sk_hash */ int qlen; /* number of packets in flow queue */ /* Second cache line */ int credit; int band; struct fq_flow *next; /* next pointer in RR lists */ struct rb_node rate_node; /* anchor in q->delayed tree */ u64 time_next_packet; }; struct fq_flow_head { struct fq_flow *first; struct fq_flow *last; }; struct fq_perband_flows { struct fq_flow_head new_flows; struct fq_flow_head old_flows; int credit; int quantum; /* based on band nr : 576KB, 192KB, 64KB */ }; #define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2) struct fq_sched_data { /* Read mostly cache line */ u64 offload_horizon; u32 quantum; u32 initial_quantum; u32 flow_refill_delay; u32 flow_plimit; /* max packets per flow */ unsigned long flow_max_rate; /* optional max rate per flow */ u64 ce_threshold; u64 horizon; /* horizon in ns */ u32 orphan_mask; /* mask for orphaned skb */ u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; u8 horizon_drop; u8 prio2band[FQ_PRIO2BAND_CRUMB_SIZE]; u32 timer_slack; /* hrtimer slack in ns */ /* Read/Write fields. */ unsigned int band_nr; /* band being serviced in fq_dequeue() */ struct fq_perband_flows band_flows[FQ_BANDS]; struct fq_flow internal; /* fastpath queue. */ struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; unsigned long unthrottle_latency_ns; u32 band_pkt_count[FQ_BANDS]; u32 flows; u32 inactive_flows; /* Flows with no packet to send. */ u32 throttled_flows; u64 stat_throttled; struct qdisc_watchdog watchdog; u64 stat_gc_flows; /* Seldom used fields. */ u64 stat_band_drops[FQ_BANDS]; u64 stat_ce_mark; u64 stat_horizon_drops; u64 stat_horizon_caps; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; }; /* return the i-th 2-bit value ("crumb") */ static u8 fq_prio2band(const u8 *prio2band, unsigned int prio) { return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3; } /* * f->tail and f->age share the same location. * We can use the low order bit to differentiate if this location points * to a sk_buff or contains a jiffies value, if we force this value to be odd. * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2 */ static void fq_flow_set_detached(struct fq_flow *f) { f->age = jiffies | 1UL; } static bool fq_flow_is_detached(const struct fq_flow *f) { return !!(f->age & 1UL); } /* special value to mark a throttled flow (not on old/new list) */ static struct fq_flow throttled; static bool fq_flow_is_throttled(const struct fq_flow *f) { return f->next == &throttled; } enum new_flow { NEW_FLOW, OLD_FLOW }; static void fq_flow_add_tail(struct fq_sched_data *q, struct fq_flow *flow, enum new_flow list_sel) { struct fq_perband_flows *pband = &q->band_flows[flow->band]; struct fq_flow_head *head = (list_sel == NEW_FLOW) ? &pband->new_flows : &pband->old_flows; if (head->first) head->last->next = flow; else head->first = flow; head->last = flow; flow->next = NULL; } static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) { rb_erase(&f->rate_node, &q->delayed); q->throttled_flows--; fq_flow_add_tail(q, f, OLD_FLOW); } static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) { struct rb_node **p = &q->delayed.rb_node, *parent = NULL; while (*p) { struct fq_flow *aux; parent = *p; aux = rb_entry(parent, struct fq_flow, rate_node); if (f->time_next_packet >= aux->time_next_packet) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&f->rate_node, parent, p); rb_insert_color(&f->rate_node, &q->delayed); q->throttled_flows++; q->stat_throttled++; f->next = &throttled; if (q->time_next_delayed_flow > f->time_next_packet) q->time_next_delayed_flow = f->time_next_packet; } static struct kmem_cache *fq_flow_cachep __read_mostly; /* limit number of collected flows per round */ #define FQ_GC_MAX 8 #define FQ_GC_AGE (3*HZ) static bool fq_gc_candidate(const struct fq_flow *f) { return fq_flow_is_detached(f) && time_after(jiffies, f->age + FQ_GC_AGE); } static void fq_gc(struct fq_sched_data *q, struct rb_root *root, struct sock *sk) { struct rb_node **p, *parent; void *tofree[FQ_GC_MAX]; struct fq_flow *f; int i, fcnt = 0; p = &root->rb_node; parent = NULL; while (*p) { parent = *p; f = rb_entry(parent, struct fq_flow, fq_node); if (f->sk == sk) break; if (fq_gc_candidate(f)) { tofree[fcnt++] = f; if (fcnt == FQ_GC_MAX) break; } if (f->sk > sk) p = &parent->rb_right; else p = &parent->rb_left; } if (!fcnt) return; for (i = fcnt; i > 0; ) { f = tofree[--i]; rb_erase(&f->fq_node, root); } q->flows -= fcnt; q->inactive_flows -= fcnt; q->stat_gc_flows += fcnt; kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree); } /* Fast path can be used if : * 1) Packet tstamp is in the past, or within the pacing offload horizon. * 2) FQ qlen == 0 OR * (no flow is currently eligible for transmit, * AND fast path queue has less than 8 packets) * 3) No SO_MAX_PACING_RATE on the socket (if any). * 4) No @maxrate attribute on this qdisc, * * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure. */ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, u64 now) { const struct fq_sched_data *q = qdisc_priv(sch); const struct sock *sk; if (fq_skb_cb(skb)->time_to_send > now + q->offload_horizon) return false; if (sch->q.qlen != 0) { /* Even if some packets are stored in this qdisc, * we can still enable fast path if all of them are * scheduled in the future (ie no flows are eligible) * or in the fast path queue. */ if (q->flows != q->inactive_flows + q->throttled_flows) return false; /* Do not allow fast path queue to explode, we want Fair Queue mode * under pressure. */ if (q->internal.qlen >= 8) return false; /* Ordering invariants fall apart if some delayed flows * are ready but we haven't serviced them, yet. */ if (q->time_next_delayed_flow <= now + q->offload_horizon) return false; } sk = skb->sk; if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) && sk->sk_max_pacing_rate != ~0UL) return false; if (q->flow_max_rate != ~0UL) return false; return true; } static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, u64 now) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket * or a listener (SYNCOOKIE mode) * 1) request sockets are not full blown, * they do not contain sk_pacing_rate * 2) They are not part of a 'flow' yet * 3) We do not want to rate limit them (eg SYNFLOOD attack), * especially if the listener set SO_MAX_PACING_RATE * 4) We pretend they are orphaned * TCP can also associate TIME_WAIT sockets with RST or ACK packets. */ if (!sk || sk_listener_or_tw(sk)) { unsigned long hash = skb_get_hash(skb) & q->orphan_mask; /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ sk = (struct sock *)((hash << 1) | 1UL); skb_orphan(skb); } else if (sk->sk_state == TCP_CLOSE) { unsigned long hash = skb_get_hash(skb) & q->orphan_mask; /* * Sockets in TCP_CLOSE are non connected. * Typical use case is UDP sockets, they can send packets * with sendto() to many different destinations. * We probably could use a generic bit advertising * non connected sockets, instead of sk_state == TCP_CLOSE, * if we care enough. */ sk = (struct sock *)((hash << 1) | 1UL); } if (fq_fastpath_check(sch, skb, now)) { q->internal.stat_fastpath_packets++; if (skb->sk == sk && q->rate_enable && READ_ONCE(sk->sk_pacing_status) != SK_PACING_FQ) smp_store_release(&sk->sk_pacing_status, SK_PACING_FQ); return &q->internal; } root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; fq_gc(q, root, sk); p = &root->rb_node; parent = NULL; while (*p) { parent = *p; f = rb_entry(parent, struct fq_flow, fq_node); if (f->sk == sk) { /* socket might have been reallocated, so check * if its sk_hash is the same. * It not, we need to refill credit with * initial quantum */ if (unlikely(skb->sk == sk && f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; if (q->rate_enable) smp_store_release(&sk->sk_pacing_status, SK_PACING_FQ); if (fq_flow_is_throttled(f)) fq_flow_unset_throttled(q, f); f->time_next_packet = 0ULL; } return f; } if (f->sk > sk) p = &parent->rb_right; else p = &parent->rb_left; } f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!f)) { q->stat_allocation_errors++; return &q->internal; } /* f->t_root is already zeroed after kmem_cache_zalloc() */ fq_flow_set_detached(f); f->sk = sk; if (skb->sk == sk) { f->socket_hash = sk->sk_hash; if (q->rate_enable) smp_store_release(&sk->sk_pacing_status, SK_PACING_FQ); } f->credit = q->initial_quantum; rb_link_node(&f->fq_node, parent, p); rb_insert_color(&f->fq_node, root); q->flows++; q->inactive_flows++; return f; } static struct sk_buff *fq_peek(struct fq_flow *flow) { struct sk_buff *skb = skb_rb_first(&flow->t_root); struct sk_buff *head = flow->head; if (!skb) return head; if (!head) return skb; if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send) return skb; return head; } static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, struct sk_buff *skb) { if (skb == flow->head) { flow->head = skb->next; } else { rb_erase(&skb->rbnode, &flow->t_root); skb->dev = qdisc_dev(sch); } } /* Remove one skb from flow queue. * This skb must be the return value of prior fq_peek(). */ static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, struct sk_buff *skb) { fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) { struct rb_node **p, *parent; struct sk_buff *head, *aux; head = flow->head; if (!head || fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) { if (!head) flow->head = skb; else flow->tail->next = skb; flow->tail = skb; skb->next = NULL; return; } p = &flow->t_root.rb_node; parent = NULL; while (*p) { parent = *p; aux = rb_to_skb(parent); if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&skb->rbnode, parent, p); rb_insert_color(&skb->rbnode, &flow->t_root); } static bool fq_packet_beyond_horizon(const struct sk_buff *skb, const struct fq_sched_data *q, u64 now) { return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } #define FQDR(reason) SKB_DROP_REASON_FQ_##reason static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; u64 now; u8 band; band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; return qdisc_drop_reason(skb, sch, to_free, FQDR(BAND_LIMIT)); } now = ktime_get_ns(); if (!skb->tstamp) { fq_skb_cb(skb)->time_to_send = now; } else { /* Check if packet timestamp is too far in the future. */ if (fq_packet_beyond_horizon(skb, q, now)) { if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop_reason(skb, sch, to_free, FQDR(HORIZON_LIMIT)); } q->stat_horizon_caps++; skb->tstamp = now + q->horizon; } fq_skb_cb(skb)->time_to_send = skb->tstamp; } f = fq_classify(sch, skb, now); if (f != &q->internal) { if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; return qdisc_drop_reason(skb, sch, to_free, FQDR(FLOW_LIMIT)); } if (fq_flow_is_detached(f)) { fq_flow_add_tail(q, f, NEW_FLOW); if (time_after(jiffies, f->age + q->flow_refill_delay)) f->credit = max_t(u32, f->credit, q->quantum); } f->band = band; q->band_pkt_count[band]++; fq_skb_cb(skb)->band = band; if (f->qlen == 0) q->inactive_flows--; } f->qlen++; /* Note: this overwrites f->age */ flow_queue_add(f, skb); qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } #undef FQDR static void fq_check_throttled(struct fq_sched_data *q, u64 now) { unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now + q->offload_horizon) return; /* Update unthrottle latency EWMA. * This is cheap and can help diagnosing timer/latency problems. */ sample = (unsigned long)(now - q->time_next_delayed_flow); if ((long)sample > 0) { q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; q->unthrottle_latency_ns += sample >> 3; } now += q->offload_horizon; q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node); if (f->time_next_packet > now) { q->time_next_delayed_flow = f->time_next_packet; break; } fq_flow_unset_throttled(q, f); } } static struct fq_flow_head *fq_pband_head_select(struct fq_perband_flows *pband) { if (pband->credit <= 0) return NULL; if (pband->new_flows.first) return &pband->new_flows; return pband->old_flows.first ? &pband->old_flows : NULL; } static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_perband_flows *pband; struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; unsigned long rate; int retry; u32 plen; u64 now; if (!sch->q.qlen) return NULL; skb = fq_peek(&q->internal); if (unlikely(skb)) { q->internal.qlen--; fq_dequeue_skb(sch, &q->internal, skb); goto out; } now = ktime_get_ns(); fq_check_throttled(q, now); retry = 0; pband = &q->band_flows[q->band_nr]; begin: head = fq_pband_head_select(pband); if (!head) { while (++retry <= FQ_BANDS) { if (++q->band_nr == FQ_BANDS) q->band_nr = 0; pband = &q->band_flows[q->band_nr]; pband->credit = min(pband->credit + pband->quantum, pband->quantum); if (pband->credit > 0) goto begin; retry = 0; } if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_range_ns(&q->watchdog, q->time_next_delayed_flow, q->timer_slack); return NULL; } f = head->first; retry = 0; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; fq_flow_add_tail(q, f, OLD_FLOW); goto begin; } skb = fq_peek(f); if (skb) { u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send, f->time_next_packet); if (now + q->offload_horizon < time_next_packet) { head->first = f->next; f->time_next_packet = time_next_packet; fq_flow_set_throttled(q, f); goto begin; } prefetch(&skb->end); if ((s64)(now - time_next_packet - q->ce_threshold) > 0) { INET_ECN_set_ce(skb); q->stat_ce_mark++; } if (--f->qlen == 0) q->inactive_flows++; q->band_pkt_count[fq_skb_cb(skb)->band]--; fq_dequeue_skb(sch, f, skb); } else { head->first = f->next; /* force a pass through old_flows to prevent starvation */ if (head == &pband->new_flows) { fq_flow_add_tail(q, f, OLD_FLOW); } else { fq_flow_set_detached(f); } goto begin; } plen = qdisc_pkt_len(skb); f->credit -= plen; pband->credit -= plen; if (!q->rate_enable) goto out; rate = q->flow_max_rate; /* If EDT time was provided for this skb, we need to * update f->time_next_packet only if this qdisc enforces * a flow max rate. */ if (!skb->tstamp) { if (skb->sk) rate = min(READ_ONCE(skb->sk->sk_pacing_rate), rate); if (rate <= q->low_rate_threshold) { f->credit = 0; } else { plen = max(plen, q->quantum); if (f->credit > 0) goto out; } } if (rate != ~0UL) { u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) len = div64_ul(len, rate); /* Since socket rate can change later, * clamp the delay to 1 second. * Really, providers of too big packets should be fixed ! */ if (unlikely(len > NSEC_PER_SEC)) { len = NSEC_PER_SEC; q->stat_pkts_too_long++; } /* Account for schedule/timers drifts. * f->time_next_packet was set when prior packet was sent, * and current time (@now) can be too late by tens of us. */ if (f->time_next_packet) len -= min(len/2, now - f->time_next_packet); f->time_next_packet = now + len; } out: qdisc_bstats_update(sch, skb); return skb; } static void fq_flow_purge(struct fq_flow *flow) { struct rb_node *p = rb_first(&flow->t_root); while (p) { struct sk_buff *skb = rb_to_skb(p); p = rb_next(p); rb_erase(&skb->rbnode, &flow->t_root); rtnl_kfree_skbs(skb, skb); } rtnl_kfree_skbs(flow->head, flow->tail); flow->head = NULL; flow->qlen = 0; } static void fq_reset(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *root; struct rb_node *p; struct fq_flow *f; unsigned int idx; sch->q.qlen = 0; sch->qstats.backlog = 0; fq_flow_purge(&q->internal); if (!q->fq_root) return; for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { root = &q->fq_root[idx]; while ((p = rb_first(root)) != NULL) { f = rb_entry(p, struct fq_flow, fq_node); rb_erase(p, root); fq_flow_purge(f); kmem_cache_free(fq_flow_cachep, f); } } for (idx = 0; idx < FQ_BANDS; idx++) { q->band_flows[idx].new_flows.first = NULL; q->band_flows[idx].old_flows.first = NULL; } q->delayed = RB_ROOT; q->flows = 0; q->inactive_flows = 0; q->throttled_flows = 0; } static void fq_rehash(struct fq_sched_data *q, struct rb_root *old_array, u32 old_log, struct rb_root *new_array, u32 new_log) { struct rb_node *op, **np, *parent; struct rb_root *oroot, *nroot; struct fq_flow *of, *nf; int fcnt = 0; u32 idx; for (idx = 0; idx < (1U << old_log); idx++) { oroot = &old_array[idx]; while ((op = rb_first(oroot)) != NULL) { rb_erase(op, oroot); of = rb_entry(op, struct fq_flow, fq_node); if (fq_gc_candidate(of)) { fcnt++; kmem_cache_free(fq_flow_cachep, of); continue; } nroot = &new_array[hash_ptr(of->sk, new_log)]; np = &nroot->rb_node; parent = NULL; while (*np) { parent = *np; nf = rb_entry(parent, struct fq_flow, fq_node); BUG_ON(nf->sk == of->sk); if (nf->sk > of->sk) np = &parent->rb_right; else np = &parent->rb_left; } rb_link_node(&of->fq_node, parent, np); rb_insert_color(&of->fq_node, nroot); } } q->flows -= fcnt; q->inactive_flows -= fcnt; q->stat_gc_flows += fcnt; } static void fq_free(void *addr) { kvfree(addr); } static int fq_resize(struct Qdisc *sch, u32 log) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; void *old_fq_root; u32 idx; if (q->fq_root && log == q->fq_trees_log) return 0; /* If XPS was setup, we can allocate memory on right NUMA node */ array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL, netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; for (idx = 0; idx < (1U << log); idx++) array[idx] = RB_ROOT; sch_tree_lock(sch); old_fq_root = q->fq_root; if (old_fq_root) fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); q->fq_root = array; WRITE_ONCE(q->fq_trees_log, log); sch_tree_unlock(sch); fq_free(old_fq_root); return 0; } static const struct netlink_range_validation iq_range = { .max = INT_MAX, }; static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK }, [TCA_FQ_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_QUANTUM] = { .type = NLA_U32 }, [TCA_FQ_INITIAL_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &iq_range), [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 }, [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 }, [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, [TCA_FQ_HORIZON] = { .type = NLA_U32 }, [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), [TCA_FQ_OFFLOAD_HORIZON] = { .type = NLA_U32 }, }; /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out) { const int num_elems = TC_PRIO_MAX + 1; u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE]; int i; memset(tmp, 0, sizeof(tmp)); for (i = 0; i < num_elems; i++) tmp[i / 4] |= in[i] << (2 * (i & 0x3)); for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++) WRITE_ONCE(out[i], tmp[i]); } static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out) { const int num_elems = TC_PRIO_MAX + 1; int i; for (i = 0; i < num_elems; i++) out[i] = fq_prio2band(in, i); } static int fq_load_weights(struct fq_sched_data *q, const struct nlattr *attr, struct netlink_ext_ack *extack) { s32 *weights = nla_data(attr); int i; for (i = 0; i < FQ_BANDS; i++) { if (weights[i] < FQ_MIN_WEIGHT) { NL_SET_ERR_MSG_FMT_MOD(extack, "Weight %d less that minimum allowed %d", weights[i], FQ_MIN_WEIGHT); return -EINVAL; } } for (i = 0; i < FQ_BANDS; i++) WRITE_ONCE(q->band_flows[i].quantum, weights[i]); return 0; } static int fq_load_priomap(struct fq_sched_data *q, const struct nlattr *attr, struct netlink_ext_ack *extack) { const struct tc_prio_qopt *map = nla_data(attr); int i; if (map->bands != FQ_BANDS) { NL_SET_ERR_MSG_MOD(extack, "FQ only supports 3 bands"); return -EINVAL; } for (i = 0; i < TC_PRIO_MAX + 1; i++) { if (map->priomap[i] >= FQ_BANDS) { NL_SET_ERR_MSG_FMT_MOD(extack, "FQ priomap field %d maps to a too high band %d", i, map->priomap[i]); return -EINVAL; } } fq_prio2band_compress_crumb(map->priomap, q->prio2band); return 0; } static int fq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; int err, drop_count = 0; unsigned drop_len = 0; u32 fq_log; err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, NULL); if (err < 0) return err; sch_tree_lock(sch); fq_log = q->fq_trees_log; if (tb[TCA_FQ_BUCKETS_LOG]) { u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]); if (nval >= 1 && nval <= ilog2(256*1024)) fq_log = nval; else err = -EINVAL; } if (tb[TCA_FQ_PLIMIT]) WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_FQ_PLIMIT])); if (tb[TCA_FQ_FLOW_PLIMIT]) WRITE_ONCE(q->flow_plimit, nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT])); if (tb[TCA_FQ_QUANTUM]) { u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (quantum > 0 && quantum <= (1 << 20)) { WRITE_ONCE(q->quantum, quantum); } else { NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); err = -EINVAL; } } if (tb[TCA_FQ_INITIAL_QUANTUM]) WRITE_ONCE(q->initial_quantum, nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM])); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) pr_warn_ratelimited("sch_fq: defrate %u ignored.\n", nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE])); if (tb[TCA_FQ_FLOW_MAX_RATE]) { u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); WRITE_ONCE(q->flow_max_rate, (rate == ~0U) ? ~0UL : rate); } if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) WRITE_ONCE(q->low_rate_threshold, nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD])); if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); if (enable <= 1) WRITE_ONCE(q->rate_enable, enable); else err = -EINVAL; } if (tb[TCA_FQ_FLOW_REFILL_DELAY]) { u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ; WRITE_ONCE(q->flow_refill_delay, usecs_to_jiffies(usecs_delay)); } if (!err && tb[TCA_FQ_PRIOMAP]) err = fq_load_priomap(q, tb[TCA_FQ_PRIOMAP], extack); if (!err && tb[TCA_FQ_WEIGHTS]) err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack); if (tb[TCA_FQ_ORPHAN_MASK]) WRITE_ONCE(q->orphan_mask, nla_get_u32(tb[TCA_FQ_ORPHAN_MASK])); if (tb[TCA_FQ_CE_THRESHOLD]) WRITE_ONCE(q->ce_threshold, (u64)NSEC_PER_USEC * nla_get_u32(tb[TCA_FQ_CE_THRESHOLD])); if (tb[TCA_FQ_TIMER_SLACK]) WRITE_ONCE(q->timer_slack, nla_get_u32(tb[TCA_FQ_TIMER_SLACK])); if (tb[TCA_FQ_HORIZON]) WRITE_ONCE(q->horizon, (u64)NSEC_PER_USEC * nla_get_u32(tb[TCA_FQ_HORIZON])); if (tb[TCA_FQ_HORIZON_DROP]) WRITE_ONCE(q->horizon_drop, nla_get_u8(tb[TCA_FQ_HORIZON_DROP])); if (tb[TCA_FQ_OFFLOAD_HORIZON]) { u64 offload_horizon = (u64)NSEC_PER_USEC * nla_get_u32(tb[TCA_FQ_OFFLOAD_HORIZON]); if (offload_horizon <= qdisc_dev(sch)->max_pacing_offload_horizon) { WRITE_ONCE(q->offload_horizon, offload_horizon); } else { NL_SET_ERR_MSG_MOD(extack, "invalid offload_horizon"); err = -EINVAL; } } if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); sch_tree_lock(sch); } while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); if (!skb) break; drop_len += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); drop_count++; } qdisc_tree_reduce_backlog(sch, drop_count, drop_len); sch_tree_unlock(sch); return err; } static void fq_destroy(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); fq_reset(sch); fq_free(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } static int fq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); int i, err; sch->limit = 10000; q->flow_plimit = 100; q->quantum = 2 * psched_mtu(qdisc_dev(sch)); q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0UL; q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; for (i = 0; i < FQ_BANDS; i++) { q->band_flows[i].new_flows.first = NULL; q->band_flows[i].old_flows.first = NULL; } q->band_flows[0].quantum = 9 << 16; q->band_flows[1].quantum = 3 << 16; q->band_flows[2].quantum = 1 << 16; q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; q->low_rate_threshold = 550000 / 8; q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */ q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */ q->horizon_drop = 1; /* by default, drop packets beyond horizon */ /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; fq_prio2band_compress_crumb(sch_default_prio2band, q->prio2band); qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); if (opt) err = fq_change(sch, opt, extack); else err = fq_resize(sch, q->fq_trees_log); return err; } static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt prio = { .bands = FQ_BANDS, }; struct nlattr *opts; u64 offload_horizon; u64 ce_threshold; s32 weights[3]; u64 horizon; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ ce_threshold = READ_ONCE(q->ce_threshold); do_div(ce_threshold, NSEC_PER_USEC); horizon = READ_ONCE(q->horizon); do_div(horizon, NSEC_PER_USEC); offload_horizon = READ_ONCE(q->offload_horizon); do_div(offload_horizon, NSEC_PER_USEC); if (nla_put_u32(skb, TCA_FQ_PLIMIT, READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, READ_ONCE(q->flow_plimit)) || nla_put_u32(skb, TCA_FQ_QUANTUM, READ_ONCE(q->quantum)) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, READ_ONCE(q->initial_quantum)) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, READ_ONCE(q->rate_enable)) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, min_t(unsigned long, READ_ONCE(q->flow_max_rate), ~0U)) || nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(READ_ONCE(q->flow_refill_delay))) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, READ_ONCE(q->orphan_mask)) || nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, READ_ONCE(q->low_rate_threshold)) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, READ_ONCE(q->fq_trees_log)) || nla_put_u32(skb, TCA_FQ_TIMER_SLACK, READ_ONCE(q->timer_slack)) || nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) || nla_put_u32(skb, TCA_FQ_OFFLOAD_HORIZON, (u32)offload_horizon) || nla_put_u8(skb, TCA_FQ_HORIZON_DROP, READ_ONCE(q->horizon_drop))) goto nla_put_failure; fq_prio2band_decompress_crumb(q->prio2band, prio.priomap); if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio)) goto nla_put_failure; weights[0] = READ_ONCE(q->band_flows[0].quantum); weights[1] = READ_ONCE(q->band_flows[1].quantum); weights[2] = READ_ONCE(q->band_flows[2].quantum); if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights)) goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: return -1; } static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); struct tc_fq_qd_stats st; int i; st.pad = 0; sch_tree_lock(sch); st.gc_flows = q->stat_gc_flows; st.highprio_packets = 0; st.fastpath_packets = q->internal.stat_fastpath_packets; st.tcp_retrans = 0; st.throttled = q->stat_throttled; st.flows_plimit = q->stat_flows_plimit; st.pkts_too_long = q->stat_pkts_too_long; st.allocation_errors = q->stat_allocation_errors; st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack - ktime_get_ns(); st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; st.unthrottle_latency_ns = min_t(unsigned long, q->unthrottle_latency_ns, ~0U); st.ce_mark = q->stat_ce_mark; st.horizon_drops = q->stat_horizon_drops; st.horizon_caps = q->stat_horizon_caps; for (i = 0; i < FQ_BANDS; i++) { st.band_drops[i] = q->stat_band_drops[i]; st.band_pkt_count[i] = q->band_pkt_count[i]; } sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } static struct Qdisc_ops fq_qdisc_ops __read_mostly = { .id = "fq", .priv_size = sizeof(struct fq_sched_data), .enqueue = fq_enqueue, .dequeue = fq_dequeue, .peek = qdisc_peek_dequeued, .init = fq_init, .reset = fq_reset, .destroy = fq_destroy, .change = fq_change, .dump = fq_dump, .dump_stats = fq_dump_stats, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("fq"); static int __init fq_module_init(void) { int ret; fq_flow_cachep = kmem_cache_create("fq_flow_cache", sizeof(struct fq_flow), 0, SLAB_HWCACHE_ALIGN, NULL); if (!fq_flow_cachep) return -ENOMEM; ret = register_qdisc(&fq_qdisc_ops); if (ret) kmem_cache_destroy(fq_flow_cachep); return ret; } static void __exit fq_module_exit(void) { unregister_qdisc(&fq_qdisc_ops); kmem_cache_destroy(fq_flow_cachep); } module_init(fq_module_init) module_exit(fq_module_exit) MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Fair Queue Packet Scheduler");
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * * This code builds two trees of free clusters extents. * Trees are sorted by start of extent and by length of extent. * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. * In extreme case code reads on-disk bitmap to find free clusters. * */ #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include "ntfs.h" #include "ntfs_fs.h" /* * Maximum number of extents in tree. */ #define NTFS_MAX_WND_EXTENTS (32u * 1024u) struct rb_node_key { struct rb_node node; size_t key; }; struct e_node { struct rb_node_key start; /* Tree sorted by start. */ struct rb_node_key count; /* Tree sorted by len. */ }; static int wnd_rescan(struct wnd_bitmap *wnd); static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); static struct kmem_cache *ntfs_enode_cachep; int __init ntfs3_init_bitmap(void) { ntfs_enode_cachep = kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, SLAB_RECLAIM_ACCOUNT, NULL); return ntfs_enode_cachep ? 0 : -ENOMEM; } void ntfs3_exit_bitmap(void) { kmem_cache_destroy(ntfs_enode_cachep); } /* * wnd_scan * * b_pos + b_len - biggest fragment. * Scan range [wpos wbits) window @buf. * * Return: -1 if not found. */ static size_t wnd_scan(const void *buf, size_t wbit, u32 wpos, u32 wend, size_t to_alloc, size_t *prev_tail, size_t *b_pos, size_t *b_len) { while (wpos < wend) { size_t free_len; u32 free_bits, end; u32 used = find_next_zero_bit_le(buf, wend, wpos); if (used >= wend) { if (*b_len < *prev_tail) { *b_pos = wbit - *prev_tail; *b_len = *prev_tail; } *prev_tail = 0; return -1; } if (used > wpos) { wpos = used; if (*b_len < *prev_tail) { *b_pos = wbit - *prev_tail; *b_len = *prev_tail; } *prev_tail = 0; } /* * Now we have a fragment [wpos, wend) staring with 0. */ end = wpos + to_alloc - *prev_tail; free_bits = find_next_bit_le(buf, min(end, wend), wpos); free_len = *prev_tail + free_bits - wpos; if (*b_len < free_len) { *b_pos = wbit + wpos - *prev_tail; *b_len = free_len; } if (free_len >= to_alloc) return wbit + wpos - *prev_tail; if (free_bits >= wend) { *prev_tail += free_bits - wpos; return -1; } wpos = free_bits + 1; *prev_tail = 0; } return -1; } /* * wnd_close - Frees all resources. */ void wnd_close(struct wnd_bitmap *wnd) { struct rb_node *node, *next; kvfree(wnd->free_bits); wnd->free_bits = NULL; run_close(&wnd->run); node = rb_first(&wnd->start_tree); while (node) { next = rb_next(node); rb_erase(node, &wnd->start_tree); kmem_cache_free(ntfs_enode_cachep, rb_entry(node, struct e_node, start.node)); node = next; } } static struct rb_node *rb_lookup(struct rb_root *root, size_t v) { struct rb_node **p = &root->rb_node; struct rb_node *r = NULL; while (*p) { struct rb_node_key *k; k = rb_entry(*p, struct rb_node_key, node); if (v < k->key) { p = &(*p)->rb_left; } else if (v > k->key) { r = &k->node; p = &(*p)->rb_right; } else { return &k->node; } } return r; } /* * rb_insert_count - Helper function to insert special kind of 'count' tree. */ static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; size_t e_ckey = e->count.key; size_t e_skey = e->start.key; while (*p) { struct e_node *k = rb_entry(parent = *p, struct e_node, count.node); if (e_ckey > k->count.key) { p = &(*p)->rb_left; } else if (e_ckey < k->count.key) { p = &(*p)->rb_right; } else if (e_skey < k->start.key) { p = &(*p)->rb_left; } else if (e_skey > k->start.key) { p = &(*p)->rb_right; } else { WARN_ON(1); return false; } } rb_link_node(&e->count.node, parent, p); rb_insert_color(&e->count.node, root); return true; } /* * rb_insert_start - Helper function to insert special kind of 'count' tree. */ static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; size_t e_skey = e->start.key; while (*p) { struct e_node *k; parent = *p; k = rb_entry(parent, struct e_node, start.node); if (e_skey < k->start.key) { p = &(*p)->rb_left; } else if (e_skey > k->start.key) { p = &(*p)->rb_right; } else { WARN_ON(1); return false; } } rb_link_node(&e->start.node, parent, p); rb_insert_color(&e->start.node, root); return true; } /* * wnd_add_free_ext - Adds a new extent of free space. * @build: 1 when building tree. */ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, bool build) { struct e_node *e, *e0 = NULL; size_t ib, end_in = bit + len; struct rb_node *n; if (build) { /* Use extent_min to filter too short extents. */ if (wnd->count >= NTFS_MAX_WND_EXTENTS && len <= wnd->extent_min) { wnd->uptodated = -1; return; } } else { /* Try to find extent before 'bit'. */ n = rb_lookup(&wnd->start_tree, bit); if (!n) { n = rb_first(&wnd->start_tree); } else { e = rb_entry(n, struct e_node, start.node); n = rb_next(n); if (e->start.key + e->count.key == bit) { /* Remove left. */ bit = e->start.key; len += e->count.key; rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; e0 = e; } } while (n) { size_t next_end; e = rb_entry(n, struct e_node, start.node); next_end = e->start.key + e->count.key; if (e->start.key > end_in) break; /* Remove right. */ n = rb_next(n); len += next_end - end_in; end_in = next_end; rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; if (!e0) e0 = e; else kmem_cache_free(ntfs_enode_cachep, e); } if (wnd->uptodated != 1) { /* Check bits before 'bit'. */ ib = wnd->zone_bit == wnd->zone_end || bit < wnd->zone_end ? 0 : wnd->zone_end; while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { bit -= 1; len += 1; } /* Check bits after 'end_in'. */ ib = wnd->zone_bit == wnd->zone_end || end_in > wnd->zone_bit ? wnd->nbits : wnd->zone_bit; while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { end_in += 1; len += 1; } } } /* Insert new fragment. */ if (wnd->count >= NTFS_MAX_WND_EXTENTS) { if (e0) kmem_cache_free(ntfs_enode_cachep, e0); wnd->uptodated = -1; /* Compare with smallest fragment. */ n = rb_last(&wnd->count_tree); e = rb_entry(n, struct e_node, count.node); if (len <= e->count.key) goto out; /* Do not insert small fragments. */ if (build) { struct e_node *e2; n = rb_prev(n); e2 = rb_entry(n, struct e_node, count.node); /* Smallest fragment will be 'e2->count.key'. */ wnd->extent_min = e2->count.key; } /* Replace smallest fragment by new one. */ rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; } else { e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); if (!e) { wnd->uptodated = -1; goto out; } if (build && len <= wnd->extent_min) wnd->extent_min = len; } e->start.key = bit; e->count.key = len; if (len > wnd->extent_max) wnd->extent_max = len; rb_insert_start(&wnd->start_tree, e); rb_insert_count(&wnd->count_tree, e); wnd->count += 1; out:; } /* * wnd_remove_free_ext - Remove a run from the cached free space. */ static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) { struct rb_node *n, *n3; struct e_node *e, *e3; size_t end_in = bit + len; size_t end3, end, new_key, new_len, max_new_len; /* Try to find extent before 'bit'. */ n = rb_lookup(&wnd->start_tree, bit); if (!n) return; e = rb_entry(n, struct e_node, start.node); end = e->start.key + e->count.key; new_key = new_len = 0; len = e->count.key; /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ if (e->start.key > bit) ; else if (end_in <= end) { /* Range [bit,end_in) inside 'e'. */ new_key = end_in; new_len = end - end_in; len = bit - e->start.key; } else if (bit > end) { bool bmax = false; n3 = rb_next(n); while (n3) { e3 = rb_entry(n3, struct e_node, start.node); if (e3->start.key >= end_in) break; if (e3->count.key == wnd->extent_max) bmax = true; end3 = e3->start.key + e3->count.key; if (end3 > end_in) { e3->start.key = end_in; rb_erase(&e3->count.node, &wnd->count_tree); e3->count.key = end3 - end_in; rb_insert_count(&wnd->count_tree, e3); break; } n3 = rb_next(n3); rb_erase(&e3->start.node, &wnd->start_tree); rb_erase(&e3->count.node, &wnd->count_tree); wnd->count -= 1; kmem_cache_free(ntfs_enode_cachep, e3); } if (!bmax) return; n3 = rb_first(&wnd->count_tree); wnd->extent_max = n3 ? rb_entry(n3, struct e_node, count.node)->count.key : 0; return; } if (e->count.key != wnd->extent_max) { ; } else if (rb_prev(&e->count.node)) { ; } else { n3 = rb_next(&e->count.node); max_new_len = max(len, new_len); if (!n3) { wnd->extent_max = max_new_len; } else { e3 = rb_entry(n3, struct e_node, count.node); wnd->extent_max = max(e3->count.key, max_new_len); } } if (!len) { if (new_len) { e->start.key = new_key; rb_erase(&e->count.node, &wnd->count_tree); e->count.key = new_len; rb_insert_count(&wnd->count_tree, e); } else { rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; kmem_cache_free(ntfs_enode_cachep, e); } goto out; } rb_erase(&e->count.node, &wnd->count_tree); e->count.key = len; rb_insert_count(&wnd->count_tree, e); if (!new_len) goto out; if (wnd->count >= NTFS_MAX_WND_EXTENTS) { wnd->uptodated = -1; /* Get minimal extent. */ e = rb_entry(rb_last(&wnd->count_tree), struct e_node, count.node); if (e->count.key > new_len) goto out; /* Replace minimum. */ rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; } else { e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); if (!e) wnd->uptodated = -1; } if (e) { e->start.key = new_key; e->count.key = new_len; rb_insert_start(&wnd->start_tree, e); rb_insert_count(&wnd->count_tree, e); wnd->count += 1; } out: if (!wnd->count && 1 != wnd->uptodated) wnd_rescan(wnd); } /* * wnd_rescan - Scan all bitmap. Used while initialization. */ static int wnd_rescan(struct wnd_bitmap *wnd) { int err = 0; size_t prev_tail = 0; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi = sb->s_fs_info; u64 lbo, len = 0; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 wbits = 8 * sb->s_blocksize; u32 used, frb; size_t wpos, wbit, iw, vbo; struct buffer_head *bh = NULL; CLST lcn, clen; wnd->uptodated = 0; wnd->extent_max = 0; wnd->extent_min = MINUS_ONE_T; wnd->total_zeroes = 0; vbo = 0; for (iw = 0; iw < wnd->nwnd; iw++) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; if (wnd->inited) { if (!wnd->free_bits[iw]) { /* All ones. */ if (prev_tail) { wnd_add_free_ext(wnd, vbo * 8 - prev_tail, prev_tail, true); prev_tail = 0; } goto next_wnd; } if (wbits == wnd->free_bits[iw]) { /* All zeroes. */ prev_tail += wbits; wnd->total_zeroes += wbits; goto next_wnd; } } if (!len) { u32 off = vbo & sbi->cluster_mask; if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, &lcn, &clen, NULL)) { err = -ENOENT; goto out; } lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; } bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) { err = -EIO; goto out; } used = ntfs_bitmap_weight_le(bh->b_data, wbits); if (used < wbits) { frb = wbits - used; wnd->free_bits[iw] = frb; wnd->total_zeroes += frb; } wpos = 0; wbit = vbo * 8; if (wbit + wbits > wnd->nbits) wbits = wnd->nbits - wbit; do { used = find_next_zero_bit_le(bh->b_data, wbits, wpos); if (used > wpos && prev_tail) { wnd_add_free_ext(wnd, wbit + wpos - prev_tail, prev_tail, true); prev_tail = 0; } wpos = used; if (wpos >= wbits) { /* No free blocks. */ prev_tail = 0; break; } frb = find_next_bit_le(bh->b_data, wbits, wpos); if (frb >= wbits) { /* Keep last free block. */ prev_tail += frb - wpos; break; } wnd_add_free_ext(wnd, wbit + wpos - prev_tail, frb + prev_tail - wpos, true); /* Skip free block and first '1'. */ wpos = frb + 1; /* Reset previous tail. */ prev_tail = 0; } while (wpos < wbits); next_wnd: if (bh) put_bh(bh); bh = NULL; vbo += blocksize; if (len) { len -= blocksize; lbo += blocksize; } } /* Add last block. */ if (prev_tail) wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); /* * Before init cycle wnd->uptodated was 0. * If any errors or limits occurs while initialization then * wnd->uptodated will be -1. * If 'uptodated' is still 0 then Tree is really updated. */ if (!wnd->uptodated) wnd->uptodated = 1; if (wnd->zone_bit != wnd->zone_end) { size_t zlen = wnd->zone_end - wnd->zone_bit; wnd->zone_end = wnd->zone_bit; wnd_zone_set(wnd, wnd->zone_bit, zlen); } out: return err; } int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) { int err; u32 blocksize = sb->s_blocksize; u32 wbits = blocksize * 8; init_rwsem(&wnd->rw_lock); wnd->sb = sb; wnd->nbits = nbits; wnd->total_zeroes = nbits; wnd->extent_max = MINUS_ONE_T; wnd->zone_bit = wnd->zone_end = 0; wnd->nwnd = bytes_to_block(sb, ntfs3_bitmap_size(nbits)); wnd->bits_last = nbits & (wbits - 1); if (!wnd->bits_last) wnd->bits_last = wbits; wnd->free_bits = kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO); if (!wnd->free_bits) return -ENOMEM; err = wnd_rescan(wnd); if (err) return err; wnd->inited = true; return 0; } /* * wnd_map - Call sb_bread for requested window. */ static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) { size_t vbo; CLST lcn, clen; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi; struct buffer_head *bh; u64 lbo; sbi = sb->s_fs_info; vbo = (u64)iw << sb->s_blocksize_bits; if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, NULL)) { return ERR_PTR(-ENOENT); } lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); if (!bh) return ERR_PTR(-EIO); return bh; } /* * wnd_set_free - Mark the bits range from bit to bit + bits as free. */ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) { int err = 0; struct super_block *sb = wnd->sb; u32 wbits = 8 * sb->s_blocksize; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbit = bit & (wbits - 1); struct buffer_head *bh; u32 op; for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } lock_buffer(bh); ntfs_bitmap_clear_le(bh->b_data, wbit, op); wnd->free_bits[iw] += op; wnd->total_zeroes += op; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); put_bh(bh); wnd_add_free_ext(wnd, bit, op, false); } return err; } /* * wnd_set_used - Mark the bits range from bit to bit + bits as used. */ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) { int err = 0; struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); struct buffer_head *bh; u32 op; for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } lock_buffer(bh); ntfs_bitmap_set_le(bh->b_data, wbit, op); wnd->free_bits[iw] -= op; wnd->total_zeroes -= op; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); put_bh(bh); if (!RB_EMPTY_ROOT(&wnd->start_tree)) wnd_remove_free_ext(wnd, bit, op); } return err; } /* * wnd_set_used_safe - Mark the bits range from bit to bit + bits as used. * * Unlikely wnd_set_used/wnd_set_free this function is not full trusted. * It scans every bit in bitmap and marks free bit as used. * @done - how many bits were marked as used. * * NOTE: normally *done should be 0. */ int wnd_set_used_safe(struct wnd_bitmap *wnd, size_t bit, size_t bits, size_t *done) { size_t i, from = 0, len = 0; int err = 0; *done = 0; for (i = 0; i < bits; i++) { if (wnd_is_free(wnd, bit + i, 1)) { if (!len) from = bit + i; len += 1; } else if (len) { err = wnd_set_used(wnd, from, len); *done += len; len = 0; if (err) break; } } if (len) { /* last fragment. */ err = wnd_set_used(wnd, from, len); *done += len; } return err; } /* * wnd_is_free_hlp * * Return: True if all clusters [bit, bit+bits) are free (bitmap only). */ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) { struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); u32 op; for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); if (wbits != wnd->free_bits[iw]) { bool ret; struct buffer_head *bh = wnd_map(wnd, iw); if (IS_ERR(bh)) return false; ret = are_bits_clear(bh->b_data, wbit, op); put_bh(bh); if (!ret) return false; } } return true; } /* * wnd_is_free * * Return: True if all clusters [bit, bit+bits) are free. */ bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) { bool ret; struct rb_node *n; size_t end; struct e_node *e; if (RB_EMPTY_ROOT(&wnd->start_tree)) goto use_wnd; n = rb_lookup(&wnd->start_tree, bit); if (!n) goto use_wnd; e = rb_entry(n, struct e_node, start.node); end = e->start.key + e->count.key; if (bit < end && bit + bits <= end) return true; use_wnd: ret = wnd_is_free_hlp(wnd, bit, bits); return ret; } /* * wnd_is_used * * Return: True if all clusters [bit, bit+bits) are used. */ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) { bool ret = false; struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); u32 op; size_t end; struct rb_node *n; struct e_node *e; if (RB_EMPTY_ROOT(&wnd->start_tree)) goto use_wnd; end = bit + bits; n = rb_lookup(&wnd->start_tree, end - 1); if (!n) goto use_wnd; e = rb_entry(n, struct e_node, start.node); if (e->start.key + e->count.key > bit) return false; use_wnd: for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); if (wnd->free_bits[iw]) { bool ret; struct buffer_head *bh = wnd_map(wnd, iw); if (IS_ERR(bh)) goto out; ret = are_bits_set(bh->b_data, wbit, op); put_bh(bh); if (!ret) goto out; } } ret = true; out: return ret; } /* * wnd_find - Look for free space. * * - flags - BITMAP_FIND_XXX flags * * Return: 0 if not found. */ size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, size_t flags, size_t *allocated) { struct super_block *sb; u32 wbits, wpos, wzbit, wzend; size_t fnd, max_alloc, b_len, b_pos; size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; size_t to_alloc0 = to_alloc; const struct e_node *e; const struct rb_node *pr, *cr; u8 log2_bits; bool fbits_valid; struct buffer_head *bh; /* Fast checking for available free space. */ if (flags & BITMAP_FIND_FULL) { size_t zeroes = wnd_zeroes(wnd); zeroes -= wnd->zone_end - wnd->zone_bit; if (zeroes < to_alloc0) goto no_space; if (to_alloc0 > wnd->extent_max) goto no_space; } else { if (to_alloc > wnd->extent_max) to_alloc = wnd->extent_max; } if (wnd->zone_bit <= hint && hint < wnd->zone_end) hint = wnd->zone_end; max_alloc = wnd->nbits; b_len = b_pos = 0; if (hint >= max_alloc) hint = 0; if (RB_EMPTY_ROOT(&wnd->start_tree)) { if (wnd->uptodated == 1) { /* Extents tree is updated -> No free space. */ goto no_space; } goto scan_bitmap; } e = NULL; if (!hint) goto allocate_biggest; /* Use hint: Enumerate extents by start >= hint. */ pr = NULL; cr = wnd->start_tree.rb_node; for (;;) { e = rb_entry(cr, struct e_node, start.node); if (e->start.key == hint) break; if (e->start.key < hint) { pr = cr; cr = cr->rb_right; if (!cr) break; continue; } cr = cr->rb_left; if (!cr) { e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; break; } } if (!e) goto allocate_biggest; if (e->start.key + e->count.key > hint) { /* We have found extension with 'hint' inside. */ size_t len = e->start.key + e->count.key - hint; if (len >= to_alloc && hint + to_alloc <= max_alloc) { fnd = hint; goto found; } if (!(flags & BITMAP_FIND_FULL)) { if (len > to_alloc) len = to_alloc; if (hint + len <= max_alloc) { fnd = hint; to_alloc = len; goto found; } } } allocate_biggest: /* Allocate from biggest free extent. */ e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); if (e->count.key != wnd->extent_max) wnd->extent_max = e->count.key; if (e->count.key < max_alloc) { if (e->count.key >= to_alloc) { ; } else if (flags & BITMAP_FIND_FULL) { if (e->count.key < to_alloc0) { /* Biggest free block is less then requested. */ goto no_space; } to_alloc = e->count.key; } else if (-1 != wnd->uptodated) { to_alloc = e->count.key; } else { /* Check if we can use more bits. */ size_t op, max_check; struct rb_root start_tree; memcpy(&start_tree, &wnd->start_tree, sizeof(struct rb_root)); memset(&wnd->start_tree, 0, sizeof(struct rb_root)); max_check = e->start.key + to_alloc; if (max_check > max_alloc) max_check = max_alloc; for (op = e->start.key + e->count.key; op < max_check; op++) { if (!wnd_is_free(wnd, op, 1)) break; } memcpy(&wnd->start_tree, &start_tree, sizeof(struct rb_root)); to_alloc = op - e->start.key; } /* Prepare to return. */ fnd = e->start.key; if (e->start.key + to_alloc > max_alloc) to_alloc = max_alloc - e->start.key; goto found; } if (wnd->uptodated == 1) { /* Extents tree is updated -> no free space. */ goto no_space; } b_len = e->count.key; b_pos = e->start.key; scan_bitmap: sb = wnd->sb; log2_bits = sb->s_blocksize_bits + 3; /* At most two ranges [hint, max_alloc) + [0, hint). */ Again: /* TODO: Optimize request for case nbits > wbits. */ iw = hint >> log2_bits; wbits = sb->s_blocksize * 8; wpos = hint & (wbits - 1); prev_tail = 0; fbits_valid = true; if (max_alloc == wnd->nbits) { nwnd = wnd->nwnd; } else { size_t t = max_alloc + wbits - 1; nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; } /* Enumerate all windows. */ for (; iw < nwnd; iw++) { wbit = iw << log2_bits; if (!wnd->free_bits[iw]) { if (prev_tail > b_len) { b_pos = wbit - prev_tail; b_len = prev_tail; } /* Skip full used window. */ prev_tail = 0; wpos = 0; continue; } if (unlikely(iw + 1 == nwnd)) { if (max_alloc == wnd->nbits) { wbits = wnd->bits_last; } else { size_t t = max_alloc & (wbits - 1); if (t) { wbits = t; fbits_valid = false; } } } if (wnd->zone_end > wnd->zone_bit) { ebit = wbit + wbits; zbit = max(wnd->zone_bit, wbit); zend = min(wnd->zone_end, ebit); /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ if (zend <= zbit) { /* Zone does not overlap window. */ } else { wzbit = zbit - wbit; wzend = zend - wbit; /* Zone overlaps window. */ if (wnd->free_bits[iw] == wzend - wzbit) { prev_tail = 0; wpos = 0; continue; } /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { /* TODO: Error */ prev_tail = 0; wpos = 0; continue; } /* Scan range [wbit, zbit). */ if (wpos < wzbit) { /* Scan range [wpos, zbit). */ fnd = wnd_scan(bh->b_data, wbit, wpos, wzbit, to_alloc, &prev_tail, &b_pos, &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; } } prev_tail = 0; /* Scan range [zend, ebit). */ if (wzend < wbits) { fnd = wnd_scan(bh->b_data, wbit, max(wzend, wpos), wbits, to_alloc, &prev_tail, &b_pos, &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; } } wpos = 0; put_bh(bh); continue; } } /* Current window does not overlap zone. */ if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { /* Window is empty. */ if (prev_tail + wbits >= to_alloc) { fnd = wbit + wpos - prev_tail; goto found; } /* Increase 'prev_tail' and process next window. */ prev_tail += wbits; wpos = 0; continue; } /* Read window. */ bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { // TODO: Error. prev_tail = 0; wpos = 0; continue; } /* Scan range [wpos, eBits). */ fnd = wnd_scan(bh->b_data, wbit, wpos, wbits, to_alloc, &prev_tail, &b_pos, &b_len); put_bh(bh); if (fnd != MINUS_ONE_T) goto found; } if (b_len < prev_tail) { /* The last fragment. */ b_len = prev_tail; b_pos = max_alloc - prev_tail; } if (hint) { /* * We have scanned range [hint max_alloc). * Prepare to scan range [0 hint + to_alloc). */ size_t nextmax = hint + to_alloc; if (likely(nextmax >= hint) && nextmax < max_alloc) max_alloc = nextmax; hint = 0; goto Again; } if (!b_len) goto no_space; wnd->extent_max = b_len; if (flags & BITMAP_FIND_FULL) goto no_space; fnd = b_pos; to_alloc = b_len; found: if (flags & BITMAP_FIND_MARK_AS_USED) { /* TODO: Optimize remove extent (pass 'e'?). */ if (wnd_set_used(wnd, fnd, to_alloc)) goto no_space; } else if (wnd->extent_max != MINUS_ONE_T && to_alloc > wnd->extent_max) { wnd->extent_max = to_alloc; } *allocated = fnd; return to_alloc; no_space: return 0; } /* * wnd_extend - Extend bitmap ($MFT bitmap). */ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) { int err; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi = sb->s_fs_info; u32 blocksize = sb->s_blocksize; u32 wbits = blocksize * 8; u32 b0, new_last; size_t bits, iw, new_wnd; size_t old_bits = wnd->nbits; u16 *new_free; if (new_bits <= old_bits) return -EINVAL; /* Align to 8 byte boundary. */ new_wnd = bytes_to_block(sb, ntfs3_bitmap_size(new_bits)); new_last = new_bits & (wbits - 1); if (!new_last) new_last = wbits; if (new_wnd != wnd->nwnd) { new_free = kmalloc_array(new_wnd, sizeof(u16), GFP_NOFS); if (!new_free) return -ENOMEM; memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); memset(new_free + wnd->nwnd, 0, (new_wnd - wnd->nwnd) * sizeof(short)); kvfree(wnd->free_bits); wnd->free_bits = new_free; } /* Zero bits [old_bits,new_bits). */ bits = new_bits - old_bits; b0 = old_bits & (wbits - 1); for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { u32 op; size_t frb; u64 vbo, lbo, bytes; struct buffer_head *bh; if (iw + 1 == new_wnd) wbits = new_last; op = b0 + bits > wbits ? wbits - b0 : bits; vbo = (u64)iw * blocksize; err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); if (err) return err; bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) return -EIO; lock_buffer(bh); ntfs_bitmap_clear_le(bh->b_data, b0, blocksize * 8 - b0); frb = wbits - ntfs_bitmap_weight_le(bh->b_data, wbits); wnd->total_zeroes += frb - wnd->free_bits[iw]; wnd->free_bits[iw] = frb; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); /* err = sync_dirty_buffer(bh); */ b0 = 0; bits -= op; } wnd->nbits = new_bits; wnd->nwnd = new_wnd; wnd->bits_last = new_last; wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); return 0; } void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) { size_t zlen = wnd->zone_end - wnd->zone_bit; if (zlen) wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) wnd_remove_free_ext(wnd, lcn, len); wnd->zone_bit = lcn; wnd->zone_end = lcn + len; } int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) { int err = 0; struct super_block *sb = sbi->sb; struct wnd_bitmap *wnd = &sbi->used.bitmap; u32 wbits = 8 * sb->s_blocksize; CLST len = 0, lcn = 0, done = 0; CLST minlen = bytes_to_cluster(sbi, range->minlen); CLST lcn_from = bytes_to_cluster(sbi, range->start); size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); u32 wbit = lcn_from & (wbits - 1); CLST lcn_to; if (!minlen) minlen = 1; if (range->len == (u64)-1) lcn_to = wnd->nbits; else lcn_to = bytes_to_cluster(sbi, range->start + range->len); down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); for (; iw < wnd->nwnd; iw++, wbit = 0) { CLST lcn_wnd = iw * wbits; struct buffer_head *bh; if (lcn_wnd > lcn_to) break; if (!wnd->free_bits[iw]) continue; if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; if (lcn_wnd + wbits > lcn_to) wbits = lcn_to - lcn_wnd; bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } for (; wbit < wbits; wbit++) { if (!test_bit_le(wbit, bh->b_data)) { if (!len) lcn = lcn_wnd + wbit; len += 1; continue; } if (len >= minlen) { err = ntfs_discard(sbi, lcn, len); if (err) goto out; done += len; } len = 0; } put_bh(bh); } /* Process the last fragment. */ if (len >= minlen) { err = ntfs_discard(sbi, lcn, len); if (err) goto out; done += len; } out: range->len = (u64)done << sbi->cluster_bits; up_read(&wnd->rw_lock); return err; } #if BITS_PER_LONG == 64 typedef __le64 bitmap_ulong; #define cpu_to_ul(x) cpu_to_le64(x) #define ul_to_cpu(x) le64_to_cpu(x) #else typedef __le32 bitmap_ulong; #define cpu_to_ul(x) cpu_to_le32(x) #define ul_to_cpu(x) le32_to_cpu(x) #endif void ntfs_bitmap_set_le(void *map, unsigned int start, int len) { bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); const unsigned int size = start + len; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); bitmap_ulong mask_to_set = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); while (len - bits_to_set >= 0) { *p |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = cpu_to_ul(~0UL); p++; } if (len) { mask_to_set &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); *p |= mask_to_set; } } void ntfs_bitmap_clear_le(void *map, unsigned int start, int len) { bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); const unsigned int size = start + len; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); bitmap_ulong mask_to_clear = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); while (len - bits_to_clear >= 0) { *p &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = cpu_to_ul(~0UL); p++; } if (len) { mask_to_clear &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); *p &= ~mask_to_clear; } } unsigned int ntfs_bitmap_weight_le(const void *bitmap, int bits) { const ulong *bmp = bitmap; unsigned int k, lim = bits / BITS_PER_LONG; unsigned int w = 0; for (k = 0; k < lim; k++) w += hweight_long(bmp[k]); if (bits % BITS_PER_LONG) { w += hweight_long(ul_to_cpu(((bitmap_ulong *)bitmap)[k]) & BITMAP_LAST_WORD_MASK(bits)); } return w; }
7 1 6 7 6 6 6 6 6 6 6 6 6 6 6 6 6 6 3 3 2 2 2 3 3 1 1 2 1 1 4 1 3 2 1 4 2 4 2 2 2 1 2 1 1 2 2 1 1 2 1 2 1 4 3 2 4 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-sdr-cap.c - software defined radio support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/math64.h> #include <linux/videodev2.h> #include <linux/v4l2-dv-timings.h> #include <media/v4l2-common.h> #include <media/v4l2-event.h> #include <media/v4l2-dv-timings.h> #include <linux/fixp-arith.h> #include <linux/jiffies.h> #include "vivid-core.h" #include "vivid-ctrls.h" #include "vivid-sdr-cap.h" /* stream formats */ struct vivid_format { u32 pixelformat; u32 buffersize; }; /* format descriptions for capture and preview */ static const struct vivid_format formats[] = { { .pixelformat = V4L2_SDR_FMT_CU8, .buffersize = SDR_CAP_SAMPLES_PER_BUF * 2, }, { .pixelformat = V4L2_SDR_FMT_CS8, .buffersize = SDR_CAP_SAMPLES_PER_BUF * 2, }, }; static const struct v4l2_frequency_band bands_adc[] = { { .tuner = 0, .type = V4L2_TUNER_ADC, .index = 0, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 300000, .rangehigh = 300000, }, { .tuner = 0, .type = V4L2_TUNER_ADC, .index = 1, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 900001, .rangehigh = 2800000, }, { .tuner = 0, .type = V4L2_TUNER_ADC, .index = 2, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 3200000, .rangehigh = 3200000, }, }; /* ADC band midpoints */ #define BAND_ADC_0 ((bands_adc[0].rangehigh + bands_adc[1].rangelow) / 2) #define BAND_ADC_1 ((bands_adc[1].rangehigh + bands_adc[2].rangelow) / 2) static const struct v4l2_frequency_band bands_fm[] = { { .tuner = 1, .type = V4L2_TUNER_RF, .index = 0, .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = 50000000, .rangehigh = 2000000000, }, }; static void vivid_thread_sdr_cap_tick(struct vivid_dev *dev) { struct vivid_buffer *sdr_cap_buf = NULL; dprintk(dev, 1, "SDR Capture Thread Tick\n"); /* Drop a certain percentage of buffers. */ if (dev->perc_dropped_buffers && get_random_u32_below(100) < dev->perc_dropped_buffers) return; spin_lock(&dev->slock); if (!list_empty(&dev->sdr_cap_active)) { sdr_cap_buf = list_entry(dev->sdr_cap_active.next, struct vivid_buffer, list); list_del(&sdr_cap_buf->list); } spin_unlock(&dev->slock); if (sdr_cap_buf) { sdr_cap_buf->vb.sequence = dev->sdr_cap_with_seq_wrap_count; v4l2_ctrl_request_setup(sdr_cap_buf->vb.vb2_buf.req_obj.req, &dev->ctrl_hdl_sdr_cap); v4l2_ctrl_request_complete(sdr_cap_buf->vb.vb2_buf.req_obj.req, &dev->ctrl_hdl_sdr_cap); vivid_sdr_cap_process(dev, sdr_cap_buf); sdr_cap_buf->vb.vb2_buf.timestamp = ktime_get_ns() + dev->time_wrap_offset; vb2_buffer_done(&sdr_cap_buf->vb.vb2_buf, dev->dqbuf_error ? VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE); dev->dqbuf_error = false; } } static int vivid_thread_sdr_cap(void *data) { struct vivid_dev *dev = data; u64 samples_since_start; u64 buffers_since_start; u64 next_jiffies_since_start; unsigned long jiffies_since_start; unsigned long cur_jiffies; unsigned wait_jiffies; dprintk(dev, 1, "SDR Capture Thread Start\n"); set_freezable(); /* Resets frame counters */ dev->sdr_cap_seq_offset = 0; dev->sdr_cap_seq_count = 0; dev->jiffies_sdr_cap = jiffies; dev->sdr_cap_seq_resync = false; if (dev->time_wrap) dev->time_wrap_offset = dev->time_wrap - ktime_get_ns(); else dev->time_wrap_offset = 0; for (;;) { try_to_freeze(); if (kthread_should_stop()) break; if (!mutex_trylock(&dev->mutex)) { schedule(); continue; } cur_jiffies = jiffies; if (dev->sdr_cap_seq_resync) { dev->jiffies_sdr_cap = cur_jiffies; dev->sdr_cap_seq_offset = dev->sdr_cap_seq_count + 1; dev->sdr_cap_seq_count = 0; dev->sdr_cap_seq_resync = false; } /* Calculate the number of jiffies since we started streaming */ jiffies_since_start = cur_jiffies - dev->jiffies_sdr_cap; /* Get the number of buffers streamed since the start */ buffers_since_start = (u64)jiffies_since_start * dev->sdr_adc_freq + (HZ * SDR_CAP_SAMPLES_PER_BUF) / 2; do_div(buffers_since_start, HZ * SDR_CAP_SAMPLES_PER_BUF); /* * After more than 0xf0000000 (rounded down to a multiple of * 'jiffies-per-day' to ease jiffies_to_msecs calculation) * jiffies have passed since we started streaming reset the * counters and keep track of the sequence offset. */ if (jiffies_since_start > JIFFIES_RESYNC) { dev->jiffies_sdr_cap = cur_jiffies; dev->sdr_cap_seq_offset = buffers_since_start; buffers_since_start = 0; } dev->sdr_cap_seq_count = buffers_since_start + dev->sdr_cap_seq_offset; dev->sdr_cap_with_seq_wrap_count = dev->sdr_cap_seq_count - dev->sdr_cap_seq_start; vivid_thread_sdr_cap_tick(dev); mutex_unlock(&dev->mutex); /* * Calculate the number of samples streamed since we started, * not including the current buffer. */ samples_since_start = buffers_since_start * SDR_CAP_SAMPLES_PER_BUF; /* And the number of jiffies since we started */ jiffies_since_start = jiffies - dev->jiffies_sdr_cap; /* Increase by the number of samples in one buffer */ samples_since_start += SDR_CAP_SAMPLES_PER_BUF; /* * Calculate when that next buffer is supposed to start * in jiffies since we started streaming. */ next_jiffies_since_start = samples_since_start * HZ + dev->sdr_adc_freq / 2; do_div(next_jiffies_since_start, dev->sdr_adc_freq); /* If it is in the past, then just schedule asap */ if (next_jiffies_since_start < jiffies_since_start) next_jiffies_since_start = jiffies_since_start; wait_jiffies = next_jiffies_since_start - jiffies_since_start; while (time_is_after_jiffies(cur_jiffies + wait_jiffies) && !kthread_should_stop()) schedule(); } dprintk(dev, 1, "SDR Capture Thread End\n"); return 0; } static int sdr_cap_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { /* 2 = max 16-bit sample returned */ u32 size = SDR_CAP_SAMPLES_PER_BUF * 2; if (*nplanes) return sizes[0] < size ? -EINVAL : 0; *nplanes = 1; sizes[0] = size; return 0; } static int sdr_cap_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); unsigned size = SDR_CAP_SAMPLES_PER_BUF * 2; dprintk(dev, 1, "%s\n", __func__); if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } if (vb2_plane_size(vb, 0) < size) { dprintk(dev, 1, "%s data will not fit into plane (%lu < %u)\n", __func__, vb2_plane_size(vb, 0), size); return -EINVAL; } vb2_set_plane_payload(vb, 0, size); return 0; } static void sdr_cap_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->sdr_cap_active); spin_unlock(&dev->slock); } static int sdr_cap_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); int err = 0; dprintk(dev, 1, "%s\n", __func__); dev->sdr_cap_seq_start = dev->seq_wrap * 128; if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else if (dev->kthread_sdr_cap == NULL) { dev->kthread_sdr_cap = kthread_run(vivid_thread_sdr_cap, dev, "%s-sdr-cap", dev->v4l2_dev.name); if (IS_ERR(dev->kthread_sdr_cap)) { v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n"); err = PTR_ERR(dev->kthread_sdr_cap); dev->kthread_sdr_cap = NULL; } } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->sdr_cap_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void sdr_cap_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); if (dev->kthread_sdr_cap == NULL) return; while (!list_empty(&dev->sdr_cap_active)) { struct vivid_buffer *buf; buf = list_entry(dev->sdr_cap_active.next, struct vivid_buffer, list); list_del(&buf->list); v4l2_ctrl_request_complete(buf->vb.vb2_buf.req_obj.req, &dev->ctrl_hdl_sdr_cap); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); } /* shutdown control thread */ kthread_stop(dev->kthread_sdr_cap); dev->kthread_sdr_cap = NULL; } static void sdr_cap_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_sdr_cap); } const struct vb2_ops vivid_sdr_cap_qops = { .queue_setup = sdr_cap_queue_setup, .buf_prepare = sdr_cap_buf_prepare, .buf_queue = sdr_cap_buf_queue, .start_streaming = sdr_cap_start_streaming, .stop_streaming = sdr_cap_stop_streaming, .buf_request_complete = sdr_cap_buf_request_complete, }; int vivid_sdr_enum_freq_bands(struct file *file, void *fh, struct v4l2_frequency_band *band) { switch (band->tuner) { case 0: if (band->index >= ARRAY_SIZE(bands_adc)) return -EINVAL; *band = bands_adc[band->index]; return 0; case 1: if (band->index >= ARRAY_SIZE(bands_fm)) return -EINVAL; *band = bands_fm[band->index]; return 0; default: return -EINVAL; } } int vivid_sdr_g_frequency(struct file *file, void *fh, struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); switch (vf->tuner) { case 0: vf->frequency = dev->sdr_adc_freq; vf->type = V4L2_TUNER_ADC; return 0; case 1: vf->frequency = dev->sdr_fm_freq; vf->type = V4L2_TUNER_RF; return 0; default: return -EINVAL; } } int vivid_sdr_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); unsigned freq = vf->frequency; unsigned band; switch (vf->tuner) { case 0: if (vf->type != V4L2_TUNER_ADC) return -EINVAL; if (freq < BAND_ADC_0) band = 0; else if (freq < BAND_ADC_1) band = 1; else band = 2; freq = clamp_t(unsigned, freq, bands_adc[band].rangelow, bands_adc[band].rangehigh); if (vb2_is_streaming(&dev->vb_sdr_cap_q) && freq != dev->sdr_adc_freq) { /* resync the thread's timings */ dev->sdr_cap_seq_resync = true; } dev->sdr_adc_freq = freq; return 0; case 1: if (vf->type != V4L2_TUNER_RF) return -EINVAL; dev->sdr_fm_freq = clamp_t(unsigned, freq, bands_fm[0].rangelow, bands_fm[0].rangehigh); return 0; default: return -EINVAL; } } int vivid_sdr_g_tuner(struct file *file, void *fh, struct v4l2_tuner *vt) { switch (vt->index) { case 0: strscpy(vt->name, "ADC", sizeof(vt->name)); vt->type = V4L2_TUNER_ADC; vt->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS; vt->rangelow = bands_adc[0].rangelow; vt->rangehigh = bands_adc[2].rangehigh; return 0; case 1: strscpy(vt->name, "RF", sizeof(vt->name)); vt->type = V4L2_TUNER_RF; vt->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS; vt->rangelow = bands_fm[0].rangelow; vt->rangehigh = bands_fm[0].rangehigh; return 0; default: return -EINVAL; } } int vivid_sdr_s_tuner(struct file *file, void *fh, const struct v4l2_tuner *vt) { if (vt->index > 1) return -EINVAL; return 0; } int vidioc_enum_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_fmtdesc *f) { if (f->index >= ARRAY_SIZE(formats)) return -EINVAL; f->pixelformat = formats[f->index].pixelformat; return 0; } int vidioc_g_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); f->fmt.sdr.pixelformat = dev->sdr_pixelformat; f->fmt.sdr.buffersize = dev->sdr_buffersize; return 0; } int vidioc_s_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct vb2_queue *q = &dev->vb_sdr_cap_q; int i; if (vb2_is_busy(q)) return -EBUSY; for (i = 0; i < ARRAY_SIZE(formats); i++) { if (formats[i].pixelformat == f->fmt.sdr.pixelformat) { dev->sdr_pixelformat = formats[i].pixelformat; dev->sdr_buffersize = formats[i].buffersize; f->fmt.sdr.buffersize = formats[i].buffersize; return 0; } } dev->sdr_pixelformat = formats[0].pixelformat; dev->sdr_buffersize = formats[0].buffersize; f->fmt.sdr.pixelformat = formats[0].pixelformat; f->fmt.sdr.buffersize = formats[0].buffersize; return 0; } int vidioc_try_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_format *f) { int i; for (i = 0; i < ARRAY_SIZE(formats); i++) { if (formats[i].pixelformat == f->fmt.sdr.pixelformat) { f->fmt.sdr.buffersize = formats[i].buffersize; return 0; } } f->fmt.sdr.pixelformat = formats[0].pixelformat; f->fmt.sdr.buffersize = formats[0].buffersize; return 0; } #define FIXP_N (15) #define FIXP_FRAC (1 << FIXP_N) #define FIXP_2PI ((int)(2 * 3.141592653589 * FIXP_FRAC)) #define M_100000PI (3.14159 * 100000) void vivid_sdr_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { u8 *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); unsigned long i; unsigned long plane_size = vb2_plane_size(&buf->vb.vb2_buf, 0); s64 s64tmp; s32 src_phase_step; s32 mod_phase_step; s32 fixp_i; s32 fixp_q; /* calculate phase step */ #define BEEP_FREQ 1000 /* 1kHz beep */ src_phase_step = DIV_ROUND_CLOSEST(FIXP_2PI * BEEP_FREQ, dev->sdr_adc_freq); for (i = 0; i < plane_size; i += 2) { mod_phase_step = fixp_cos32_rad(dev->sdr_fixp_src_phase, FIXP_2PI) >> (31 - FIXP_N); dev->sdr_fixp_src_phase += src_phase_step; s64tmp = (s64) mod_phase_step * dev->sdr_fm_deviation; dev->sdr_fixp_mod_phase += div_s64(s64tmp, M_100000PI); /* * Transfer phase angle to [0, 2xPI] in order to avoid variable * overflow and make it suitable for cosine implementation * used, which does not support negative angles. */ dev->sdr_fixp_src_phase %= FIXP_2PI; dev->sdr_fixp_mod_phase %= FIXP_2PI; if (dev->sdr_fixp_mod_phase < 0) dev->sdr_fixp_mod_phase += FIXP_2PI; fixp_i = fixp_cos32_rad(dev->sdr_fixp_mod_phase, FIXP_2PI); fixp_q = fixp_sin32_rad(dev->sdr_fixp_mod_phase, FIXP_2PI); /* Normalize fraction values represented with 32 bit precision * to fixed point representation with FIXP_N bits */ fixp_i >>= (31 - FIXP_N); fixp_q >>= (31 - FIXP_N); switch (dev->sdr_pixelformat) { case V4L2_SDR_FMT_CU8: /* convert 'fixp float' to u8 [0, +255] */ /* u8 = X * 127.5 + 127.5; X is float [-1.0, +1.0] */ fixp_i = fixp_i * 1275 + FIXP_FRAC * 1275; fixp_q = fixp_q * 1275 + FIXP_FRAC * 1275; *vbuf++ = DIV_ROUND_CLOSEST(fixp_i, FIXP_FRAC * 10); *vbuf++ = DIV_ROUND_CLOSEST(fixp_q, FIXP_FRAC * 10); break; case V4L2_SDR_FMT_CS8: /* convert 'fixp float' to s8 [-128, +127] */ /* s8 = X * 127.5 - 0.5; X is float [-1.0, +1.0] */ fixp_i = fixp_i * 1275 - FIXP_FRAC * 5; fixp_q = fixp_q * 1275 - FIXP_FRAC * 5; *vbuf++ = DIV_ROUND_CLOSEST(fixp_i, FIXP_FRAC * 10); *vbuf++ = DIV_ROUND_CLOSEST(fixp_q, FIXP_FRAC * 10); break; default: break; } } }
1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 // SPDX-License-Identifier: GPL-2.0-only /* * Hauppauge HD PVR USB driver - video 4 linux 2 interface * * Copyright (C) 2008 Janne Grunau (j@jannau.net) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/mutex.h> #include <linux/videodev2.h> #include <media/v4l2-common.h> #include "hdpvr.h" int hdpvr_config_call(struct hdpvr_device *dev, uint value, u8 valbuf) { int ret; char request_type = 0x38, snd_request = 0x01; mutex_lock(&dev->usbc_mutex); dev->usbc_buf[0] = valbuf; ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), snd_request, 0x00 | request_type, value, CTRL_DEFAULT_INDEX, dev->usbc_buf, 1, 10000); mutex_unlock(&dev->usbc_mutex); v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, "config call request for value 0x%x returned %d\n", value, ret); return ret < 0 ? ret : 0; } int get_video_info(struct hdpvr_device *dev, struct hdpvr_video_info *vidinf) { int ret; vidinf->valid = false; mutex_lock(&dev->usbc_mutex); ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), 0x81, 0x80 | 0x38, 0x1400, 0x0003, dev->usbc_buf, 5, 1000); #ifdef HDPVR_DEBUG if (hdpvr_debug & MSG_INFO) v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, "get video info returned: %d, %5ph\n", ret, dev->usbc_buf); #endif mutex_unlock(&dev->usbc_mutex); if (ret < 0) return ret; vidinf->width = dev->usbc_buf[1] << 8 | dev->usbc_buf[0]; vidinf->height = dev->usbc_buf[3] << 8 | dev->usbc_buf[2]; vidinf->fps = dev->usbc_buf[4]; vidinf->valid = vidinf->width && vidinf->height && vidinf->fps; return 0; } int get_input_lines_info(struct hdpvr_device *dev) { int ret, lines; mutex_lock(&dev->usbc_mutex); ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), 0x81, 0x80 | 0x38, 0x1800, 0x0003, dev->usbc_buf, 3, 1000); #ifdef HDPVR_DEBUG if (hdpvr_debug & MSG_INFO) v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, "get input lines info returned: %d, %3ph\n", ret, dev->usbc_buf); #else (void)ret; /* suppress compiler warning */ #endif lines = dev->usbc_buf[1] << 8 | dev->usbc_buf[0]; mutex_unlock(&dev->usbc_mutex); return lines; } int hdpvr_set_bitrate(struct hdpvr_device *dev) { int ret; mutex_lock(&dev->usbc_mutex); memset(dev->usbc_buf, 0, 4); dev->usbc_buf[0] = dev->options.bitrate; dev->usbc_buf[2] = dev->options.peak_bitrate; ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), 0x01, 0x38, CTRL_BITRATE_VALUE, CTRL_DEFAULT_INDEX, dev->usbc_buf, 4, 1000); mutex_unlock(&dev->usbc_mutex); return ret; } int hdpvr_set_audio(struct hdpvr_device *dev, u8 input, enum v4l2_mpeg_audio_encoding codec) { int ret = 0; if (dev->flags & HDPVR_FLAG_AC3_CAP) { mutex_lock(&dev->usbc_mutex); memset(dev->usbc_buf, 0, 2); dev->usbc_buf[0] = input; if (codec == V4L2_MPEG_AUDIO_ENCODING_AAC) dev->usbc_buf[1] = 0; else if (codec == V4L2_MPEG_AUDIO_ENCODING_AC3) dev->usbc_buf[1] = 1; else { mutex_unlock(&dev->usbc_mutex); v4l2_err(&dev->v4l2_dev, "invalid audio codec %d\n", codec); ret = -EINVAL; goto error; } ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), 0x01, 0x38, CTRL_AUDIO_INPUT_VALUE, CTRL_DEFAULT_INDEX, dev->usbc_buf, 2, 1000); mutex_unlock(&dev->usbc_mutex); if (ret == 2) ret = 0; } else ret = hdpvr_config_call(dev, CTRL_AUDIO_INPUT_VALUE, input); error: return ret; } int hdpvr_set_options(struct hdpvr_device *dev) { hdpvr_config_call(dev, CTRL_VIDEO_STD_TYPE, dev->options.video_std); hdpvr_config_call(dev, CTRL_VIDEO_INPUT_VALUE, dev->options.video_input+1); hdpvr_set_audio(dev, dev->options.audio_input+1, dev->options.audio_codec); hdpvr_set_bitrate(dev); hdpvr_config_call(dev, CTRL_BITRATE_MODE_VALUE, dev->options.bitrate_mode); hdpvr_config_call(dev, CTRL_GOP_MODE_VALUE, dev->options.gop_mode); hdpvr_config_call(dev, CTRL_BRIGHTNESS, dev->options.brightness); hdpvr_config_call(dev, CTRL_CONTRAST, dev->options.contrast); hdpvr_config_call(dev, CTRL_HUE, dev->options.hue); hdpvr_config_call(dev, CTRL_SATURATION, dev->options.saturation); hdpvr_config_call(dev, CTRL_SHARPNESS, dev->options.sharpness); return 0; }
15 12 7 8 10 3 3 1 7 11 12 12 6 8 11 15 15 7 8 5 8 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 // SPDX-License-Identifier: GPL-2.0-only /* * vMTRR implementation * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * Copyright(C) 2015 Intel Corporation. * * Authors: * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> * Marcelo Tosatti <mtosatti@redhat.com> * Paolo Bonzini <pbonzini@redhat.com> * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <asm/mtrr.h> #include "cpuid.h" #include "x86.h" static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr) { int index; switch (msr) { case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1): index = msr - MTRRphysBase_MSR(0); return &vcpu->arch.mtrr_state.var[index]; case MSR_MTRRfix64K_00000: return &vcpu->arch.mtrr_state.fixed_64k; case MSR_MTRRfix16K_80000: case MSR_MTRRfix16K_A0000: index = msr - MSR_MTRRfix16K_80000; return &vcpu->arch.mtrr_state.fixed_16k[index]; case MSR_MTRRfix4K_C0000: case MSR_MTRRfix4K_C8000: case MSR_MTRRfix4K_D0000: case MSR_MTRRfix4K_D8000: case MSR_MTRRfix4K_E0000: case MSR_MTRRfix4K_E8000: case MSR_MTRRfix4K_F0000: case MSR_MTRRfix4K_F8000: index = msr - MSR_MTRRfix4K_C0000; return &vcpu->arch.mtrr_state.fixed_4k[index]; case MSR_MTRRdefType: return &vcpu->arch.mtrr_state.deftype; default: break; } return NULL; } static bool valid_mtrr_type(unsigned t) { return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ } static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) { int i; u64 mask; if (msr == MSR_MTRRdefType) { if (data & ~0xcff) return false; return valid_mtrr_type(data & 0xff); } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { for (i = 0; i < 8 ; i++) if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) return false; return true; } /* variable MTRRs */ if (WARN_ON_ONCE(!(msr >= MTRRphysBase_MSR(0) && msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1)))) return false; mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu); if ((msr & 1) == 0) { /* MTRR base */ if (!valid_mtrr_type(data & 0xff)) return false; mask |= 0xf00; } else { /* MTRR mask */ mask |= 0x7ff; } return (data & mask) == 0; } int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *mtrr; mtrr = find_mtrr(vcpu, msr); if (!mtrr) return 1; if (!kvm_mtrr_valid(vcpu, msr, data)) return 1; *mtrr = data; return 0; } int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 *mtrr; /* MSR_MTRRcap is a readonly MSR. */ if (msr == MSR_MTRRcap) { /* * SMRR = 0 * WC = 1 * FIX = 1 * VCNT = KVM_NR_VAR_MTRR */ *pdata = 0x500 | KVM_NR_VAR_MTRR; return 0; } mtrr = find_mtrr(vcpu, msr); if (!mtrr) return 1; *pdata = *mtrr; return 0; }
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_IO_APIC_H #define __KVM_IO_APIC_H #include <linux/kvm_host.h> #include <kvm/iodev.h> #include "irq.h" struct kvm; struct kvm_vcpu; #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 #define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 #define IOAPIC_MEM_LENGTH 0x100 /* Direct registers. */ #define IOAPIC_REG_SELECT 0x00 #define IOAPIC_REG_WINDOW 0x10 /* Indirect registers. */ #define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ #define IOAPIC_REG_VERSION 0x01 #define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ /*ioapic delivery mode*/ #define IOAPIC_FIXED 0x0 #define IOAPIC_LOWEST_PRIORITY 0x1 #define IOAPIC_PMI 0x2 #define IOAPIC_NMI 0x4 #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 #define RTC_GSI 8 struct dest_map { /* vcpu bitmap where IRQ has been sent */ DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ u8 vectors[KVM_MAX_VCPU_IDS]; }; struct rtc_status { int pending_eoi; struct dest_map dest_map; }; union kvm_ioapic_redirect_entry { u64 bits; struct { u8 vector; u8 delivery_mode:3; u8 dest_mode:1; u8 delivery_status:1; u8 polarity:1; u8 remote_irr:1; u8 trig_mode:1; u8 mask:1; u8 reserve:7; u8 reserved[4]; u8 dest_id; } fields; }; struct kvm_ioapic { u64 base_address; u32 ioregsel; u32 id; u32 irr; u32 pad; union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; spinlock_t lock; struct rtc_status rtc_status; struct delayed_work eoi_inject; u32 irq_eoi[IOAPIC_NUM_PINS]; u32 irr_delivered; }; #ifdef DEBUG #define ASSERT(x) \ do { \ if (!(x)) { \ printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ __FILE__, __LINE__, #x); \ BUG(); \ } \ } while (0) #else #define ASSERT(x) do { } while (0) #endif static inline int ioapic_in_kernel(struct kvm *kvm) { return irqchip_kernel(kvm); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); #endif
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA506 chip based cameras function * M Xhaard 15/04/2004 based on different work Mark Taylor and others * and my own snoopy file on a pv-321c donate by a german compagny * "Firma Frank Gmbh" from Saarbruecken * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define MODULE_NAME "spca506" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA506 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ char norme; char channel; }; static const struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 5}, {176, 144, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 4}, {320, 240, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {352, 288, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {640, 480, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; #define SPCA50X_OFFSET_DATA 10 #define SAA7113_bright 0x0a /* defaults 0x80 */ #define SAA7113_contrast 0x0b /* defaults 0x47 */ #define SAA7113_saturation 0x0c /* defaults 0x40 */ #define SAA7113_hue 0x0d /* defaults 0x00 */ #define SAA7113_I2C_BASE_WRITE 0x4a /* read 'len' bytes to gspca_dev->usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, __u16 req, __u16 index, __u16 length) { usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, length, 500); } static void reg_w(struct usb_device *dev, __u16 req, __u16 value, __u16 index) { usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); } static void spca506_Initi2c(struct gspca_dev *gspca_dev) { reg_w(gspca_dev->dev, 0x07, SAA7113_I2C_BASE_WRITE, 0x0004); } static void spca506_WriteI2c(struct gspca_dev *gspca_dev, __u16 valeur, __u16 reg) { int retry = 60; reg_w(gspca_dev->dev, 0x07, reg, 0x0001); reg_w(gspca_dev->dev, 0x07, valeur, 0x0000); while (retry--) { reg_r(gspca_dev, 0x07, 0x0003, 2); if ((gspca_dev->usb_buf[0] | gspca_dev->usb_buf[1]) == 0x00) break; } } static void spca506_SetNormeInput(struct gspca_dev *gspca_dev, __u16 norme, __u16 channel) { struct sd *sd = (struct sd *) gspca_dev; /* fixme: check if channel == 0..3 and 6..9 (8 values) */ __u8 setbit0 = 0x00; __u8 setbit1 = 0x00; __u8 videomask = 0x00; gspca_dbg(gspca_dev, D_STREAM, "** Open Set Norme **\n"); spca506_Initi2c(gspca_dev); /* NTSC bit0 -> 1(525 l) PAL SECAM bit0 -> 0 (625 l) */ /* Composite channel bit1 -> 1 S-video bit 1 -> 0 */ /* and exclude SAA7113 reserved channel set default 0 otherwise */ if (norme & V4L2_STD_NTSC) setbit0 = 0x01; if (channel == 4 || channel == 5 || channel > 9) channel = 0; if (channel < 4) setbit1 = 0x02; videomask = (0x48 | setbit0 | setbit1); reg_w(gspca_dev->dev, 0x08, videomask, 0x0000); spca506_WriteI2c(gspca_dev, (0xc0 | (channel & 0x0F)), 0x02); if (norme & V4L2_STD_NTSC) spca506_WriteI2c(gspca_dev, 0x33, 0x0e); /* Chrominance Control NTSC N */ else if (norme & V4L2_STD_SECAM) spca506_WriteI2c(gspca_dev, 0x53, 0x0e); /* Chrominance Control SECAM */ else spca506_WriteI2c(gspca_dev, 0x03, 0x0e); /* Chrominance Control PAL BGHIV */ sd->norme = norme; sd->channel = channel; gspca_dbg(gspca_dev, D_STREAM, "Set Video Byte to 0x%2x\n", videomask); gspca_dbg(gspca_dev, D_STREAM, "Set Norme: %08x Channel %d", norme, channel); } static void spca506_GetNormeInput(struct gspca_dev *gspca_dev, __u16 *norme, __u16 *channel) { struct sd *sd = (struct sd *) gspca_dev; /* Read the register is not so good value change so we use your own copy in spca50x struct */ *norme = sd->norme; *channel = sd->channel; gspca_dbg(gspca_dev, D_STREAM, "Get Norme: %d Channel %d\n", *norme, *channel); } static void spca506_Setsize(struct gspca_dev *gspca_dev, __u16 code, __u16 xmult, __u16 ymult) { struct usb_device *dev = gspca_dev->dev; gspca_dbg(gspca_dev, D_STREAM, "** SetSize **\n"); reg_w(dev, 0x04, (0x18 | (code & 0x07)), 0x0000); /* Soft snap 0x40 Hard 0x41 */ reg_w(dev, 0x04, 0x41, 0x0001); reg_w(dev, 0x04, 0x00, 0x0002); /* reserved */ reg_w(dev, 0x04, 0x00, 0x0003); /* reserved */ reg_w(dev, 0x04, 0x00, 0x0004); /* reserved */ reg_w(dev, 0x04, 0x01, 0x0005); /* reserced */ reg_w(dev, 0x04, xmult, 0x0006); /* reserved */ reg_w(dev, 0x04, ymult, 0x0007); /* compression 1 */ reg_w(dev, 0x04, 0x00, 0x0008); /* T=64 -> 2 */ reg_w(dev, 0x04, 0x00, 0x0009); /* threshold2D */ reg_w(dev, 0x04, 0x21, 0x000a); /* quantization */ reg_w(dev, 0x04, 0x00, 0x000b); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam; cam = &gspca_dev->cam; cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; reg_w(dev, 0x03, 0x00, 0x0004); reg_w(dev, 0x03, 0xFF, 0x0003); reg_w(dev, 0x03, 0x00, 0x0000); reg_w(dev, 0x03, 0x1c, 0x0001); reg_w(dev, 0x03, 0x18, 0x0001); /* Init on PAL and composite input0 */ spca506_SetNormeInput(gspca_dev, 0, 0); reg_w(dev, 0x03, 0x1c, 0x0001); reg_w(dev, 0x03, 0x18, 0x0001); reg_w(dev, 0x05, 0x00, 0x0000); reg_w(dev, 0x05, 0xef, 0x0001); reg_w(dev, 0x05, 0x00, 0x00c1); reg_w(dev, 0x05, 0x00, 0x00c2); reg_w(dev, 0x06, 0x18, 0x0002); reg_w(dev, 0x06, 0xf5, 0x0011); reg_w(dev, 0x06, 0x02, 0x0012); reg_w(dev, 0x06, 0xfb, 0x0013); reg_w(dev, 0x06, 0x00, 0x0014); reg_w(dev, 0x06, 0xa4, 0x0051); reg_w(dev, 0x06, 0x40, 0x0052); reg_w(dev, 0x06, 0x71, 0x0053); reg_w(dev, 0x06, 0x40, 0x0054); /************************************************/ reg_w(dev, 0x03, 0x00, 0x0004); reg_w(dev, 0x03, 0x00, 0x0003); reg_w(dev, 0x03, 0x00, 0x0004); reg_w(dev, 0x03, 0xFF, 0x0003); reg_w(dev, 0x02, 0x00, 0x0000); reg_w(dev, 0x03, 0x60, 0x0000); reg_w(dev, 0x03, 0x18, 0x0001); /* for a better reading mx :) */ /*sdca506_WriteI2c(value,register) */ spca506_Initi2c(gspca_dev); spca506_WriteI2c(gspca_dev, 0x08, 0x01); spca506_WriteI2c(gspca_dev, 0xc0, 0x02); /* input composite video */ spca506_WriteI2c(gspca_dev, 0x33, 0x03); spca506_WriteI2c(gspca_dev, 0x00, 0x04); spca506_WriteI2c(gspca_dev, 0x00, 0x05); spca506_WriteI2c(gspca_dev, 0x0d, 0x06); spca506_WriteI2c(gspca_dev, 0xf0, 0x07); spca506_WriteI2c(gspca_dev, 0x98, 0x08); spca506_WriteI2c(gspca_dev, 0x03, 0x09); spca506_WriteI2c(gspca_dev, 0x80, 0x0a); spca506_WriteI2c(gspca_dev, 0x47, 0x0b); spca506_WriteI2c(gspca_dev, 0x48, 0x0c); spca506_WriteI2c(gspca_dev, 0x00, 0x0d); spca506_WriteI2c(gspca_dev, 0x03, 0x0e); /* Chroma Pal adjust */ spca506_WriteI2c(gspca_dev, 0x2a, 0x0f); spca506_WriteI2c(gspca_dev, 0x00, 0x10); spca506_WriteI2c(gspca_dev, 0x0c, 0x11); spca506_WriteI2c(gspca_dev, 0xb8, 0x12); spca506_WriteI2c(gspca_dev, 0x01, 0x13); spca506_WriteI2c(gspca_dev, 0x00, 0x14); spca506_WriteI2c(gspca_dev, 0x00, 0x15); spca506_WriteI2c(gspca_dev, 0x00, 0x16); spca506_WriteI2c(gspca_dev, 0x00, 0x17); spca506_WriteI2c(gspca_dev, 0x00, 0x18); spca506_WriteI2c(gspca_dev, 0x00, 0x19); spca506_WriteI2c(gspca_dev, 0x00, 0x1a); spca506_WriteI2c(gspca_dev, 0x00, 0x1b); spca506_WriteI2c(gspca_dev, 0x00, 0x1c); spca506_WriteI2c(gspca_dev, 0x00, 0x1d); spca506_WriteI2c(gspca_dev, 0x00, 0x1e); spca506_WriteI2c(gspca_dev, 0xa1, 0x1f); spca506_WriteI2c(gspca_dev, 0x02, 0x40); spca506_WriteI2c(gspca_dev, 0xff, 0x41); spca506_WriteI2c(gspca_dev, 0xff, 0x42); spca506_WriteI2c(gspca_dev, 0xff, 0x43); spca506_WriteI2c(gspca_dev, 0xff, 0x44); spca506_WriteI2c(gspca_dev, 0xff, 0x45); spca506_WriteI2c(gspca_dev, 0xff, 0x46); spca506_WriteI2c(gspca_dev, 0xff, 0x47); spca506_WriteI2c(gspca_dev, 0xff, 0x48); spca506_WriteI2c(gspca_dev, 0xff, 0x49); spca506_WriteI2c(gspca_dev, 0xff, 0x4a); spca506_WriteI2c(gspca_dev, 0xff, 0x4b); spca506_WriteI2c(gspca_dev, 0xff, 0x4c); spca506_WriteI2c(gspca_dev, 0xff, 0x4d); spca506_WriteI2c(gspca_dev, 0xff, 0x4e); spca506_WriteI2c(gspca_dev, 0xff, 0x4f); spca506_WriteI2c(gspca_dev, 0xff, 0x50); spca506_WriteI2c(gspca_dev, 0xff, 0x51); spca506_WriteI2c(gspca_dev, 0xff, 0x52); spca506_WriteI2c(gspca_dev, 0xff, 0x53); spca506_WriteI2c(gspca_dev, 0xff, 0x54); spca506_WriteI2c(gspca_dev, 0xff, 0x55); spca506_WriteI2c(gspca_dev, 0xff, 0x56); spca506_WriteI2c(gspca_dev, 0xff, 0x57); spca506_WriteI2c(gspca_dev, 0x00, 0x58); spca506_WriteI2c(gspca_dev, 0x54, 0x59); spca506_WriteI2c(gspca_dev, 0x07, 0x5a); spca506_WriteI2c(gspca_dev, 0x83, 0x5b); spca506_WriteI2c(gspca_dev, 0x00, 0x5c); spca506_WriteI2c(gspca_dev, 0x00, 0x5d); spca506_WriteI2c(gspca_dev, 0x00, 0x5e); spca506_WriteI2c(gspca_dev, 0x00, 0x5f); spca506_WriteI2c(gspca_dev, 0x00, 0x60); spca506_WriteI2c(gspca_dev, 0x05, 0x61); spca506_WriteI2c(gspca_dev, 0x9f, 0x62); gspca_dbg(gspca_dev, D_STREAM, "** Close Init *\n"); return 0; } static int sd_start(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; __u16 norme; __u16 channel; /**************************************/ reg_w(dev, 0x03, 0x00, 0x0004); reg_w(dev, 0x03, 0x00, 0x0003); reg_w(dev, 0x03, 0x00, 0x0004); reg_w(dev, 0x03, 0xFF, 0x0003); reg_w(dev, 0x02, 0x00, 0x0000); reg_w(dev, 0x03, 0x60, 0x0000); reg_w(dev, 0x03, 0x18, 0x0001); /*sdca506_WriteI2c(value,register) */ spca506_Initi2c(gspca_dev); spca506_WriteI2c(gspca_dev, 0x08, 0x01); /* Increment Delay */ /* spca506_WriteI2c(gspca_dev, 0xc0, 0x02); * Analog Input Control 1 */ spca506_WriteI2c(gspca_dev, 0x33, 0x03); /* Analog Input Control 2 */ spca506_WriteI2c(gspca_dev, 0x00, 0x04); /* Analog Input Control 3 */ spca506_WriteI2c(gspca_dev, 0x00, 0x05); /* Analog Input Control 4 */ spca506_WriteI2c(gspca_dev, 0x0d, 0x06); /* Horizontal Sync Start 0xe9-0x0d */ spca506_WriteI2c(gspca_dev, 0xf0, 0x07); /* Horizontal Sync Stop 0x0d-0xf0 */ spca506_WriteI2c(gspca_dev, 0x98, 0x08); /* Sync Control */ /* Defaults value */ spca506_WriteI2c(gspca_dev, 0x03, 0x09); /* Luminance Control */ spca506_WriteI2c(gspca_dev, 0x80, 0x0a); /* Luminance Brightness */ spca506_WriteI2c(gspca_dev, 0x47, 0x0b); /* Luminance Contrast */ spca506_WriteI2c(gspca_dev, 0x48, 0x0c); /* Chrominance Saturation */ spca506_WriteI2c(gspca_dev, 0x00, 0x0d); /* Chrominance Hue Control */ spca506_WriteI2c(gspca_dev, 0x2a, 0x0f); /* Chrominance Gain Control */ /**************************************/ spca506_WriteI2c(gspca_dev, 0x00, 0x10); /* Format/Delay Control */ spca506_WriteI2c(gspca_dev, 0x0c, 0x11); /* Output Control 1 */ spca506_WriteI2c(gspca_dev, 0xb8, 0x12); /* Output Control 2 */ spca506_WriteI2c(gspca_dev, 0x01, 0x13); /* Output Control 3 */ spca506_WriteI2c(gspca_dev, 0x00, 0x14); /* reserved */ spca506_WriteI2c(gspca_dev, 0x00, 0x15); /* VGATE START */ spca506_WriteI2c(gspca_dev, 0x00, 0x16); /* VGATE STOP */ spca506_WriteI2c(gspca_dev, 0x00, 0x17); /* VGATE Control (MSB) */ spca506_WriteI2c(gspca_dev, 0x00, 0x18); spca506_WriteI2c(gspca_dev, 0x00, 0x19); spca506_WriteI2c(gspca_dev, 0x00, 0x1a); spca506_WriteI2c(gspca_dev, 0x00, 0x1b); spca506_WriteI2c(gspca_dev, 0x00, 0x1c); spca506_WriteI2c(gspca_dev, 0x00, 0x1d); spca506_WriteI2c(gspca_dev, 0x00, 0x1e); spca506_WriteI2c(gspca_dev, 0xa1, 0x1f); spca506_WriteI2c(gspca_dev, 0x02, 0x40); spca506_WriteI2c(gspca_dev, 0xff, 0x41); spca506_WriteI2c(gspca_dev, 0xff, 0x42); spca506_WriteI2c(gspca_dev, 0xff, 0x43); spca506_WriteI2c(gspca_dev, 0xff, 0x44); spca506_WriteI2c(gspca_dev, 0xff, 0x45); spca506_WriteI2c(gspca_dev, 0xff, 0x46); spca506_WriteI2c(gspca_dev, 0xff, 0x47); spca506_WriteI2c(gspca_dev, 0xff, 0x48); spca506_WriteI2c(gspca_dev, 0xff, 0x49); spca506_WriteI2c(gspca_dev, 0xff, 0x4a); spca506_WriteI2c(gspca_dev, 0xff, 0x4b); spca506_WriteI2c(gspca_dev, 0xff, 0x4c); spca506_WriteI2c(gspca_dev, 0xff, 0x4d); spca506_WriteI2c(gspca_dev, 0xff, 0x4e); spca506_WriteI2c(gspca_dev, 0xff, 0x4f); spca506_WriteI2c(gspca_dev, 0xff, 0x50); spca506_WriteI2c(gspca_dev, 0xff, 0x51); spca506_WriteI2c(gspca_dev, 0xff, 0x52); spca506_WriteI2c(gspca_dev, 0xff, 0x53); spca506_WriteI2c(gspca_dev, 0xff, 0x54); spca506_WriteI2c(gspca_dev, 0xff, 0x55); spca506_WriteI2c(gspca_dev, 0xff, 0x56); spca506_WriteI2c(gspca_dev, 0xff, 0x57); spca506_WriteI2c(gspca_dev, 0x00, 0x58); spca506_WriteI2c(gspca_dev, 0x54, 0x59); spca506_WriteI2c(gspca_dev, 0x07, 0x5a); spca506_WriteI2c(gspca_dev, 0x83, 0x5b); spca506_WriteI2c(gspca_dev, 0x00, 0x5c); spca506_WriteI2c(gspca_dev, 0x00, 0x5d); spca506_WriteI2c(gspca_dev, 0x00, 0x5e); spca506_WriteI2c(gspca_dev, 0x00, 0x5f); spca506_WriteI2c(gspca_dev, 0x00, 0x60); spca506_WriteI2c(gspca_dev, 0x05, 0x61); spca506_WriteI2c(gspca_dev, 0x9f, 0x62); /**************************************/ reg_w(dev, 0x05, 0x00, 0x0003); reg_w(dev, 0x05, 0x00, 0x0004); reg_w(dev, 0x03, 0x10, 0x0001); reg_w(dev, 0x03, 0x78, 0x0000); switch (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv) { case 0: spca506_Setsize(gspca_dev, 0, 0x10, 0x10); break; case 1: spca506_Setsize(gspca_dev, 1, 0x1a, 0x1a); break; case 2: spca506_Setsize(gspca_dev, 2, 0x1c, 0x1c); break; case 4: spca506_Setsize(gspca_dev, 4, 0x34, 0x34); break; default: /* case 5: */ spca506_Setsize(gspca_dev, 5, 0x40, 0x40); break; } /* compress setting and size */ /* set i2c luma */ reg_w(dev, 0x02, 0x01, 0x0000); reg_w(dev, 0x03, 0x12, 0x0000); reg_r(gspca_dev, 0x04, 0x0001, 2); gspca_dbg(gspca_dev, D_STREAM, "webcam started\n"); spca506_GetNormeInput(gspca_dev, &norme, &channel); spca506_SetNormeInput(gspca_dev, norme, channel); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; reg_w(dev, 0x02, 0x00, 0x0000); reg_w(dev, 0x03, 0x00, 0x0004); reg_w(dev, 0x03, 0x00, 0x0003); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA50X_OFFSET_DATA; len -= SPCA50X_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); break; case 0xff: /* drop */ /* gspca_dev->last_packet_type = DISCARD_PACKET; */ break; default: data += 1; len -= 1; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); break; } } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { spca506_Initi2c(gspca_dev); spca506_WriteI2c(gspca_dev, val, SAA7113_bright); spca506_WriteI2c(gspca_dev, 0x01, 0x09); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { spca506_Initi2c(gspca_dev); spca506_WriteI2c(gspca_dev, val, SAA7113_contrast); spca506_WriteI2c(gspca_dev, 0x01, 0x09); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { spca506_Initi2c(gspca_dev); spca506_WriteI2c(gspca_dev, val, SAA7113_saturation); spca506_WriteI2c(gspca_dev, 0x01, 0x09); } static void sethue(struct gspca_dev *gspca_dev, s32 val) { spca506_Initi2c(gspca_dev); spca506_WriteI2c(gspca_dev, val, SAA7113_hue); spca506_WriteI2c(gspca_dev, 0x01, 0x09); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_HUE: sethue(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 4); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 0x47); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 255, 1, 0x40); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 0, 255, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x06e1, 0xa190)}, /* {USB_DEVICE(0x0733, 0x0430)}, FIXME: may be IntelPCCameraPro BRIDGE_SPCA505 */ {USB_DEVICE(0x0734, 0x043b)}, {USB_DEVICE(0x99fa, 0x8988)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
323 175 176 176 176 72 72 72 72 37 60 47 60 8 8 2 8 4 8 3 8 151 14 6 150 57 57 150 67 18 18 18 2 2 67 7 67 67 67 138 152 23 37 22 38 11 11 11 25 13 25 25 25 25 25 25 13 9 25 25 37 37 37 37 37 19 19 19 7 37 37 36 37 4 4 4 37 7 7 20 16 20 3 16 16 1 1 7 20 7 38 38 38 37 20 20 16 15 15 15 15 3 3 3 15 7 6 6 6 3 3 2 2 3 2 45 4 4 4 4 4 4 2 4 2 2 2 47 45 4 45 1 1 1 1 1 1 1 1 7 7 7 7 16 16 15 15 16 16 15 15 12 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This abstraction carries sctp events to the ULP (sockets). * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Jon Grimm <jgrimm@us.ibm.com> * La Monte H.P. Yarroll <piggy@acm.org> * Sridhar Samudrala <sri@us.ibm.com> */ #include <linux/slab.h> #include <linux/types.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/busy_poll.h> #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *); static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *, struct sctp_ulpevent *); static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); /* 1st Level Abstractions */ /* Initialize a ULP queue from a block of memory. */ void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc) { memset(ulpq, 0, sizeof(struct sctp_ulpq)); ulpq->asoc = asoc; skb_queue_head_init(&ulpq->reasm); skb_queue_head_init(&ulpq->reasm_uo); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; } /* Flush the reassembly and ordering queues. */ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) { struct sk_buff *skb; struct sctp_ulpevent *event; while ((skb = __skb_dequeue(&ulpq->lobby)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } while ((skb = __skb_dequeue(&ulpq->reasm)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } } /* Dispose of a ulpqueue. */ void sctp_ulpq_free(struct sctp_ulpq *ulpq) { sctp_ulpq_flush(ulpq); } /* Process an incoming DATA chunk. */ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sk_buff_head temp; struct sctp_ulpevent *event; int event_eor = 0; /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); if (!event) return -ENOMEM; event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; /* Do reassembly if needed. */ event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ if (event) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ if (event) { event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; sctp_ulpq_tail_event(ulpq, &temp); } return event_eor; } /* Add a new event for propagation to the ULP. */ /* Clear the partial delivery mode for this socket. Note: This * assumes that no association is currently in partial delivery mode. */ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) { struct sctp_sock *sp = sctp_sk(sk); if (atomic_dec_and_test(&sp->pd_mode)) { /* This means there are no other associations in PD, so * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); return 1; } } else { /* There are other associations in PD, so we only need to * pull stuff out of the lobby that belongs to the * associations that is exiting PD (all of its notifications * are posted here). */ if (!skb_queue_empty(&sp->pd_lobby) && asoc) { struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; sctp_skb_for_each(skb, &sp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == asoc) { __skb_unlink(skb, &sp->pd_lobby); __skb_queue_tail(&sk->sk_receive_queue, skb); } } } } return 0; } /* Set the pd_mode on the socket and ulpq */ static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq) { struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk); atomic_inc(&sp->pd_mode); ulpq->pd_mode = 1; } /* Clear the pd_mode and restart any pending messages waiting for delivery. */ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) { ulpq->pd_mode = 0; sctp_ulpq_reasm_drain(ulpq); return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc); } int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list) { struct sock *sk = ulpq->asoc->base.sk; struct sctp_sock *sp = sctp_sk(sk); struct sctp_ulpevent *event; struct sk_buff_head *queue; struct sk_buff *skb; int clear_pd = 0; skb = __skb_peek(skb_list); event = sctp_skb2event(skb); /* If the socket is just going to throw this away, do not * even try to deliver it. */ if (sk->sk_shutdown & RCV_SHUTDOWN && (sk->sk_shutdown & SEND_SHUTDOWN || !sctp_ulpevent_is_notification(event))) goto out_free; if (!sctp_ulpevent_is_notification(event)) { sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); } /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe)) goto out_free; /* If we are in partial delivery mode, post to the lobby until * partial delivery is cleared, unless, of course _this_ is * the association the cause of the partial delivery. */ if (atomic_read(&sp->pd_mode) == 0) { queue = &sk->sk_receive_queue; } else { if (ulpq->pd_mode) { /* If the association is in partial delivery, we * need to finish delivering the partially processed * packet before passing any other data. This is * because we don't truly support stream interleaving. */ if ((event->msg_flags & MSG_NOTIFICATION) || (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK))) queue = &sp->pd_lobby; else { clear_pd = event->msg_flags & MSG_EOR; queue = &sk->sk_receive_queue; } } else { /* * If fragment interleave is enabled, we * can queue this to the receive queue instead * of the lobby. */ if (sp->frag_interleave) queue = &sk->sk_receive_queue; else queue = &sp->pd_lobby; } } skb_queue_splice_tail_init(skb_list, queue); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive * queue. */ if (clear_pd) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { if (!sock_owned_by_user(sk)) sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } return 1; out_free: sctp_queue_purge_ulpevents(skb_list); return 0; } /* 2nd Level Abstractions */ /* Helper function to store chunks that need to be reassembled. */ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff *pos; struct sctp_ulpevent *cevent; __u32 tsn, ctsn; tsn = event->tsn; /* See if it belongs at the end. */ pos = skb_peek_tail(&ulpq->reasm); if (!pos) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; } /* Short circuit just dropping it at the end. */ cevent = sctp_skb2event(pos); ctsn = cevent->tsn; if (TSN_lt(ctsn, tsn)) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; } /* Find the right place in this list. We store them by TSN. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; if (TSN_lt(tsn, ctsn)) break; } /* Insert before pos. */ __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } /* Helper function to return an event corresponding to the reassembled * datagram. * This routine creates a re-assembled skb given the first and last skb's * as stored in the reassembly queue. The skb's may be non-linear if the sctp * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; struct sctp_ulpevent *event; struct sk_buff *pnext, *last; struct sk_buff *list = skb_shinfo(f_frag)->frag_list; /* Store the pointer to the 2nd skb */ if (f_frag == l_frag) pos = NULL; else pos = f_frag->next; /* Get the last skb in the f_frag's frag_list if present. */ for (last = list; list; last = list, list = list->next) ; /* Add the list of remaining fragments to the first fragments * frag_list. */ if (last) last->next = pos; else { if (skb_cloned(f_frag)) { /* This is a cloned skb, we can't just modify * the frag_list. We need a new skb to do that. * Instead of calling skb_unshare(), we'll do it * ourselves since we need to delay the free. */ new = skb_copy(f_frag, GFP_ATOMIC); if (!new) return NULL; /* try again later */ sctp_skb_set_owner_r(new, f_frag->sk); skb_shinfo(new)->frag_list = pos; } else skb_shinfo(f_frag)->frag_list = pos; } /* Remove the first fragment from the reassembly queue. */ __skb_unlink(f_frag, queue); /* if we did unshare, then free the old skb and re-assign */ if (new) { kfree_skb(f_frag); f_frag = new; } while (pos) { pnext = pos->next; /* Update the len and data_len fields of the first fragment. */ f_frag->len += pos->len; f_frag->data_len += pos->len; /* Remove the fragment from the reassembly queue. */ __skb_unlink(pos, queue); /* Break if we have reached the last fragment. */ if (pos == l_frag) break; pos->next = pnext; pos = pnext; } event = sctp_skb2event(f_frag); SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } /* Helper function to check if an incoming chunk has filled up the last * missing fragment in a SCTP datagram and return the corresponding event. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ulpq) { struct sk_buff *pos; struct sctp_ulpevent *cevent; struct sk_buff *first_frag = NULL; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval = NULL; struct sk_buff *pd_first = NULL; struct sk_buff *pd_last = NULL; size_t pd_len = 0; struct sctp_association *asoc; u32 pd_point; /* Initialized to 0 just to avoid compiler warning message. Will * never be used with this value. It is referenced only after it * is set when we find the first fragment of a message. */ next_tsn = 0; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for a sequence of * fragmented chunks that complete a datagram. * 'first_frag' and next_tsn are reset when we find a chunk which * is the first fragment of a datagram. Once these 2 fields are set * we expect to find the remaining middle fragments and the last * fragment in order. If not, first_frag is reset to NULL and we * start the next pass when we find another first fragment. * * There is a potential to do partial delivery if user sets * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here * to see if can do PD. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: /* If this "FIRST_FRAG" is the first * element in the queue, then count it towards * possible PD. */ if (skb_queue_is_first(&ulpq->reasm, pos)) { pd_first = pos; pd_last = pos; pd_len = pos->len; } else { pd_first = NULL; pd_last = NULL; pd_len = 0; } first_frag = pos; next_tsn = ctsn + 1; break; case SCTP_DATA_MIDDLE_FRAG: if ((first_frag) && (ctsn == next_tsn)) { next_tsn++; if (pd_first) { pd_last = pos; pd_len += pos->len; } } else first_frag = NULL; break; case SCTP_DATA_LAST_FRAG: if (first_frag && (ctsn == next_tsn)) goto found; else first_frag = NULL; break; } } asoc = ulpq->asoc; if (pd_first) { /* Make sure we can enter partial deliver. * We can trigger partial delivery only if framgent * interleave is set, or the socket is not already * in partial delivery. */ if (!sctp_sk(asoc->base.sk)->frag_interleave && atomic_read(&sctp_sk(asoc->base.sk)->pd_mode)) goto done; cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm, pd_first, pd_last); if (retval) sctp_ulpq_set_pd(ulpq); } } done: return retval; found: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; } /* Retrieve the next set of fragments of a partial message. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; int is_last; struct sctp_ulpevent *retval; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for the first * sequence of fragmented chunks. */ if (skb_queue_empty(&ulpq->reasm)) return NULL; last_frag = first_frag = NULL; retval = NULL; next_tsn = 0; is_last = 0; skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: if (!first_frag) return NULL; goto done; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; } else if (next_tsn == ctsn) { next_tsn++; last_frag = pos; } else goto done; break; case SCTP_DATA_LAST_FRAG: if (!first_frag) first_frag = pos; else if (ctsn != next_tsn) goto done; last_frag = pos; is_last = 1; goto done; default: return NULL; } } /* We have the reassembled event. There is no need to look * further. */ done: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; return retval; } /* Helper function to reassemble chunks. Hold chunks on the reasm queue that * need reassembling. */ static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sctp_ulpevent *retval = NULL; /* Check if this is part of a fragmented message. */ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) { event->msg_flags |= MSG_EOR; return event; } sctp_ulpq_store_reasm(ulpq, event); if (!ulpq->pd_mode) retval = sctp_ulpq_retrieve_reassembled(ulpq); else { __u32 ctsn, ctsnap; /* Do not even bother unless this is the next tsn to * be delivered. */ ctsn = event->tsn; ctsnap = sctp_tsnmap_get_ctsn(&ulpq->asoc->peer.tsn_map); if (TSN_lte(ctsn, ctsnap)) retval = sctp_ulpq_retrieve_partial(ulpq); } return retval; } /* Retrieve the first part (sequential fragments) for partial delivery. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for a sequence of * fragmented chunks that start a datagram. */ if (skb_queue_empty(&ulpq->reasm)) return NULL; last_frag = first_frag = NULL; retval = NULL; next_tsn = 0; skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; } else goto done; break; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) return NULL; if (ctsn == next_tsn) { next_tsn++; last_frag = pos; } else goto done; break; case SCTP_DATA_LAST_FRAG: if (!first_frag) return NULL; else goto done; break; default: return NULL; } } /* We have the reassembled event. There is no need to look * further. */ done: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); return retval; } /* * Flush out stale fragments from the reassembly queue when processing * a Forward TSN. * * RFC 3758, Section 3.6 * * After receiving and processing a FORWARD TSN, the data receiver MUST * take cautions in updating its re-assembly queue. The receiver MUST * remove any partially reassembled message, which is still missing one * or more TSNs earlier than or equal to the new cumulative TSN point. * In the event that the receiver has invoked the partial delivery API, * a notification SHOULD also be generated to inform the upper layer API * that the message being partially delivered will NOT be completed. */ void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *event; __u32 tsn; if (skb_queue_empty(&ulpq->reasm)) return; skb_queue_walk_safe(&ulpq->reasm, pos, tmp) { event = sctp_skb2event(pos); tsn = event->tsn; /* Since the entire message must be abandoned by the * sender (item A3 in Section 3.5, RFC 3758), we can * free all fragments on the list that are less then * or equal to ctsn_point */ if (TSN_lte(tsn, fwd_tsn)) { __skb_unlink(pos, &ulpq->reasm); sctp_ulpevent_free(event); } else break; } } /* * Drain the reassembly queue. If we just cleared parted delivery, it * is possible that the reassembly queue will contain already reassembled * messages. Retrieve any such messages and give them to the user. */ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) { struct sctp_ulpevent *event = NULL; if (skb_queue_empty(&ulpq->reasm)) return; while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { struct sk_buff_head temp; skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); /* Do ordering if needed. */ if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); /* Send event to the ULP. 'event' is the * sctp_ulpevent for very first SKB on the temp' list. */ if (event) sctp_ulpq_tail_event(ulpq, &temp); } } /* Helper function to gather skbs that have possibly become * ordered by an incoming chunk. */ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *stream; __u16 sid, csid, cssn; sid = event->stream; stream = &ulpq->asoc->stream; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) break; /* Have we not gone far enough? */ if (csid < sid) continue; if (cssn != sctp_ssn_peek(stream, in, sid)) break; /* Found it, so mark in the stream. */ sctp_ssn_next(stream, in, sid); __skb_unlink(pos, &ulpq->lobby); /* Attach all gathered skbs to the event. */ __skb_queue_tail(event_list, pos); } } /* Helper function to store chunks needing ordering. */ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff *pos; struct sctp_ulpevent *cevent; __u16 sid, csid; __u16 ssn, cssn; pos = skb_peek_tail(&ulpq->lobby); if (!pos) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } sid = event->stream; ssn = event->ssn; cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (sid > csid) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } if ((sid == csid) && SSN_lt(cssn, ssn)) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } /* Find the right place in this list. We store them by * stream ID and then by SSN. */ skb_queue_walk(&ulpq->lobby, pos) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (csid > sid) break; if (csid == sid && SSN_lt(ssn, cssn)) break; } /* Insert before pos. */ __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { __u16 sid, ssn; struct sctp_stream *stream; /* Check if this message needs ordering. */ if (event->msg_flags & SCTP_DATA_UNORDERED) return event; /* Note: The stream ID must be verified before this routine. */ sid = event->stream; ssn = event->ssn; stream = &ulpq->asoc->stream; /* Is this the expected SSN for this stream ID? */ if (ssn != sctp_ssn_peek(stream, in, sid)) { /* We've received something out of order, so find where it * needs to be placed. We order by stream and then by SSN. */ sctp_ulpq_store_ordered(ulpq, event); return NULL; } /* Mark that the next chunk has been found. */ sctp_ssn_next(stream, in, sid); /* Go find any other chunks that were waiting for * ordering. */ sctp_ulpq_retrieve_ordered(ulpq, event); return event; } /* Helper function to gather skbs that have possibly become * ordered by forward tsn skipping their dependencies. */ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_ulpevent *event; struct sctp_stream *stream; struct sk_buff_head temp; struct sk_buff_head *lobby = &ulpq->lobby; __u16 csid, cssn; stream = &ulpq->asoc->stream; /* We are holding the chunks by stream, by SSN. */ skb_queue_head_init(&temp); event = NULL; sctp_skb_for_each(pos, lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) break; /* Have we not gone far enough? */ if (csid < sid) continue; /* see if this ssn has been marked by skipping */ if (!SSN_lt(cssn, sctp_ssn_peek(stream, in, csid))) break; __skb_unlink(pos, lobby); if (!event) /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); /* Attach all gathered skbs to the event. */ __skb_queue_tail(&temp, pos); } /* If we didn't reap any data, see if the next expected SSN * is next on the queue and if so, use that. */ if (event == NULL && pos != (struct sk_buff *)lobby) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (csid == sid && cssn == sctp_ssn_peek(stream, in, csid)) { sctp_ssn_next(stream, in, csid); __skb_unlink(pos, lobby); __skb_queue_tail(&temp, pos); event = sctp_skb2event(pos); } } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ if (event) { /* see if we have more ordered that we can deliver */ sctp_ulpq_retrieve_ordered(ulpq, event); sctp_ulpq_tail_event(ulpq, &temp); } } /* Skip over an SSN. This is used during the processing of * Forwared TSN chunk to skip over the abandoned ordered data */ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) { struct sctp_stream *stream; /* Note: The stream ID must be verified before this routine. */ stream = &ulpq->asoc->stream; /* Is this an old SSN? If so ignore. */ if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid))) return; /* Mark that we are no longer expecting this SSN or lower. */ sctp_ssn_skip(stream, in, sid, ssn); /* Go find any other chunks that were waiting for * ordering and deliver them if needed. */ sctp_ulpq_reap_ordered(ulpq, sid); } __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; __u32 tsn, last_tsn; struct sk_buff *skb, *flist, *last; struct sctp_ulpevent *event; struct sctp_tsnmap *tsnmap; tsnmap = &ulpq->asoc->peer.tsn_map; while ((skb = skb_peek_tail(list)) != NULL) { event = sctp_skb2event(skb); tsn = event->tsn; /* Don't renege below the Cumulative TSN ACK Point. */ if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) break; /* Events in ordering queue may have multiple fragments * corresponding to additional TSNs. Sum the total * freed space; find the last TSN. */ freed += skb_headlen(skb); flist = skb_shinfo(skb)->frag_list; for (last = flist; flist; flist = flist->next) { last = flist; freed += skb_headlen(last); } if (last) last_tsn = sctp_skb2event(last)->tsn; else last_tsn = tsn; /* Unlink the event, then renege all applicable TSNs. */ __skb_unlink(skb, list); sctp_ulpevent_free(event); while (TSN_lte(tsn, last_tsn)) { sctp_tsnmap_renege(tsnmap, tsn); tsn++; } if (freed >= needed) return freed; } return freed; } /* Renege 'needed' bytes from the ordering queue. */ static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) { return sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); } /* Renege 'needed' bytes from the reassembly queue. */ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) { return sctp_ulpq_renege_list(ulpq, &ulpq->reasm, needed); } /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_association *asoc; struct sctp_sock *sp; __u32 ctsn; struct sk_buff *skb; asoc = ulpq->asoc; sp = sctp_sk(asoc->base.sk); /* If the association is already in Partial Delivery mode * we have nothing to do. */ if (ulpq->pd_mode) return; /* Data must be at or below the Cumulative TSN ACK Point to * start partial delivery. */ skb = skb_peek(&asoc->ulpq.reasm); if (skb != NULL) { ctsn = sctp_skb2event(skb)->tsn; if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map))) return; } /* If the user enabled fragment interleave socket option, * multiple associations can enter partial delivery. * Otherwise, we can only enter partial delivery if the * socket is not in partial deliver mode. */ if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) { /* Is partial delivery possible? */ event = sctp_ulpq_retrieve_first(ulpq); /* Send event to the ULP. */ if (event) { struct sk_buff_head temp; skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); sctp_ulpq_tail_event(ulpq, &temp); sctp_ulpq_set_pd(ulpq); return; } } } /* Renege some packets to make room for an incoming chunk. */ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sctp_association *asoc = ulpq->asoc; __u32 freed = 0; __u16 needed; needed = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_data_chunk); if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { freed = sctp_ulpq_renege_order(ulpq, needed); if (freed < needed) freed += sctp_ulpq_renege_frags(ulpq, needed - freed); } /* If able to free enough room, accept this chunk. */ if (sk_rmem_schedule(asoc->base.sk, chunk->skb, needed) && freed >= needed) { int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); /* * Enter partial delivery if chunk has not been * delivered; otherwise, drain the reassembly queue. */ if (retval <= 0) sctp_ulpq_partial_delivery(ulpq, gfp); else if (retval == 1) sctp_ulpq_reasm_drain(ulpq); } } /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sctp_sock *sp; struct sock *sk; if (!ulpq->pd_mode) return; sk = ulpq->asoc->base.sk; sp = sctp_sk(sk); if (sctp_ulpevent_type_enabled(ulpq->asoc->subscribe, SCTP_PARTIAL_DELIVERY_EVENT)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED, 0, 0, 0, gfp); if (ev) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) { sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } }
4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 /* * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/rculist.h> #include <linux/llist.h> #include "rds_single_path.h" #include "ib_mr.h" #include "rds.h" struct workqueue_struct *rds_ib_mr_wq; static void rds_ib_odp_mr_worker(struct work_struct *work); static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) { struct rds_ib_device *rds_ibdev; struct rds_ib_ipaddr *i_ipaddr; rcu_read_lock(); list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { if (i_ipaddr->ipaddr == ipaddr) { refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } } } rcu_read_unlock(); return NULL; } static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) { struct rds_ib_ipaddr *i_ipaddr; i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL); if (!i_ipaddr) return -ENOMEM; i_ipaddr->ipaddr = ipaddr; spin_lock_irq(&rds_ibdev->spinlock); list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list); spin_unlock_irq(&rds_ibdev->spinlock); return 0; } static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) { struct rds_ib_ipaddr *i_ipaddr; struct rds_ib_ipaddr *to_free = NULL; spin_lock_irq(&rds_ibdev->spinlock); list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { if (i_ipaddr->ipaddr == ipaddr) { list_del_rcu(&i_ipaddr->list); to_free = i_ipaddr; break; } } spin_unlock_irq(&rds_ibdev->spinlock); if (to_free) kfree_rcu(to_free, rcu); } int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, struct in6_addr *ipaddr) { struct rds_ib_device *rds_ibdev_old; rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]); if (!rds_ibdev_old) return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); if (rds_ibdev_old != rds_ibdev) { rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]); rds_ib_dev_put(rds_ibdev_old); return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); } rds_ib_dev_put(rds_ibdev_old); return 0; } void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; /* conn was previously on the nodev_conns_list */ spin_lock_irq(&ib_nodev_conns_lock); BUG_ON(list_empty(&ib_nodev_conns)); BUG_ON(list_empty(&ic->ib_node)); list_del(&ic->ib_node); spin_lock(&rds_ibdev->spinlock); list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); spin_unlock(&rds_ibdev->spinlock); spin_unlock_irq(&ib_nodev_conns_lock); ic->rds_ibdev = rds_ibdev; refcount_inc(&rds_ibdev->refcount); } void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; /* place conn on nodev_conns_list */ spin_lock(&ib_nodev_conns_lock); spin_lock_irq(&rds_ibdev->spinlock); BUG_ON(list_empty(&ic->ib_node)); list_del(&ic->ib_node); spin_unlock_irq(&rds_ibdev->spinlock); list_add_tail(&ic->ib_node, &ib_nodev_conns); spin_unlock(&ib_nodev_conns_lock); ic->rds_ibdev = NULL; rds_ib_dev_put(rds_ibdev); } void rds_ib_destroy_nodev_conns(void) { struct rds_ib_connection *ic, *_ic; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&ib_nodev_conns_lock); list_splice(&ib_nodev_conns, &tmp_list); spin_unlock_irq(&ib_nodev_conns_lock); list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) rds_conn_destroy(ic->conn); } void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) { struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo->rdma_mr_max = pool_1m->max_items; iinfo->rdma_mr_size = pool_1m->max_pages; } #if IS_ENABLED(CONFIG_IPV6) void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds6_info_rdma_connection *iinfo6) { struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo6->rdma_mr_max = pool_1m->max_items; iinfo6->rdma_mr_size = pool_1m->max_pages; } #endif struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; struct llist_node *ret; unsigned long flags; spin_lock_irqsave(&pool->clean_lock, flags); ret = llist_del_first(&pool->clean_list); spin_unlock_irqrestore(&pool->clean_lock, flags); if (ret) { ibmr = llist_entry(ret, struct rds_ib_mr, llnode); if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_reused); else rds_ib_stats_inc(s_ib_rdma_mr_1m_reused); } return ibmr; } void rds_ib_sync_mr(void *trans_private, int direction) { struct rds_ib_mr *ibmr = trans_private; struct rds_ib_device *rds_ibdev = ibmr->device; if (ibmr->odp) return; switch (direction) { case DMA_FROM_DEVICE: ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg, ibmr->sg_dma_len, DMA_BIDIRECTIONAL); break; case DMA_TO_DEVICE: ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg, ibmr->sg_dma_len, DMA_BIDIRECTIONAL); break; } } void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) { struct rds_ib_device *rds_ibdev = ibmr->device; if (ibmr->sg_dma_len) { ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len, DMA_BIDIRECTIONAL); ibmr->sg_dma_len = 0; } /* Release the s/g list */ if (ibmr->sg_len) { unsigned int i; for (i = 0; i < ibmr->sg_len; ++i) { struct page *page = sg_page(&ibmr->sg[i]); /* FIXME we need a way to tell a r/w MR * from a r/o MR */ WARN_ON(!page->mapping && irqs_disabled()); set_page_dirty(page); put_page(page); } kfree(ibmr->sg); ibmr->sg = NULL; ibmr->sg_len = 0; } } void rds_ib_teardown_mr(struct rds_ib_mr *ibmr) { unsigned int pinned = ibmr->sg_len; __rds_ib_teardown_mr(ibmr); if (pinned) { struct rds_ib_mr_pool *pool = ibmr->pool; atomic_sub(pinned, &pool->free_pinned); } } static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all) { unsigned int item_count; item_count = atomic_read(&pool->item_count); if (free_all) return item_count; return 0; } /* * given an llist of mrs, put them all into the list_head for more processing */ static unsigned int llist_append_to_list(struct llist_head *llist, struct list_head *list) { struct rds_ib_mr *ibmr; struct llist_node *node; struct llist_node *next; unsigned int count = 0; node = llist_del_all(llist); while (node) { next = node->next; ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); node = next; count++; } return count; } /* * this takes a list head of mrs and turns it into linked llist nodes * of clusters. Each cluster has linked llist nodes of * MR_CLUSTER_SIZE mrs that are ready for reuse. */ static void list_to_llist_nodes(struct list_head *list, struct llist_node **nodes_head, struct llist_node **nodes_tail) { struct rds_ib_mr *ibmr; struct llist_node *cur = NULL; struct llist_node **next = nodes_head; list_for_each_entry(ibmr, list, unmap_list) { cur = &ibmr->llnode; *next = cur; next = &cur->next; } *next = NULL; *nodes_tail = cur; } /* * Flush our pool of MRs. * At a minimum, all currently unused MRs are unmapped. * If the number of MRs allocated exceeds the limit, we also try * to free as many MRs as needed to get back to this limit. */ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) { struct rds_ib_mr *ibmr; struct llist_node *clean_nodes; struct llist_node *clean_tail; LIST_HEAD(unmap_list); unsigned long unpinned = 0; unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush); if (ibmr_ret) { DEFINE_WAIT(wait); while (!mutex_trylock(&pool->flush_lock)) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) { *ibmr_ret = ibmr; finish_wait(&pool->flush_wait, &wait); goto out_nolock; } prepare_to_wait(&pool->flush_wait, &wait, TASK_UNINTERRUPTIBLE); if (llist_empty(&pool->clean_list)) schedule(); ibmr = rds_ib_reuse_mr(pool); if (ibmr) { *ibmr_ret = ibmr; finish_wait(&pool->flush_wait, &wait); goto out_nolock; } } finish_wait(&pool->flush_wait, &wait); } else mutex_lock(&pool->flush_lock); if (ibmr_ret) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) { *ibmr_ret = ibmr; goto out; } } /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) { unsigned long flags; spin_lock_irqsave(&pool->clean_lock, flags); llist_append_to_list(&pool->clean_list, &unmap_list); spin_unlock_irqrestore(&pool->clean_lock, flags); } free_goal = rds_ib_flush_goal(pool, free_all); if (list_empty(&unmap_list)) goto out; rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); if (!list_empty(&unmap_list)) { unsigned long flags; list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) { *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); clean_nodes = clean_nodes->next; } /* more than one entry in llist nodes */ if (clean_nodes) { spin_lock_irqsave(&pool->clean_lock, flags); llist_add_batch(clean_nodes, clean_tail, &pool->clean_list); spin_unlock_irqrestore(&pool->clean_lock, flags); } } atomic_sub(unpinned, &pool->free_pinned); atomic_sub(dirty_to_clean, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); out: mutex_unlock(&pool->flush_lock); if (waitqueue_active(&pool->flush_wait)) wake_up(&pool->flush_wait); out_nolock: return 0; } struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; int iter = 0; while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) return ibmr; if (atomic_inc_return(&pool->item_count) <= pool->max_items) break; atomic_dec(&pool->item_count); if (++iter > 2) { if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted); break; } /* We do have some empty MRs. Flush them out. */ if (pool->pool_type == RDS_IB_MR_8K_POOL) rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait); else rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait); rds_ib_flush_mr_pool(pool, 0, &ibmr); if (ibmr) return ibmr; } return NULL; } static void rds_ib_mr_pool_flush_worker(struct work_struct *work) { struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); rds_ib_flush_mr_pool(pool, 0, NULL); } void rds_ib_free_mr(void *trans_private, int invalidate) { struct rds_ib_mr *ibmr = trans_private; struct rds_ib_mr_pool *pool = ibmr->pool; struct rds_ib_device *rds_ibdev = ibmr->device; rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); if (ibmr->odp) { /* A MR created and marked as use_once. We use delayed work, * because there is a change that we are in interrupt and can't * call to ib_dereg_mr() directly. */ INIT_DELAYED_WORK(&ibmr->work, rds_ib_odp_mr_worker); queue_delayed_work(rds_ib_mr_wq, &ibmr->work, 0); return; } /* Return it to the pool's free list */ rds_ib_free_frmr_list(ibmr); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); /* If we've pinned too many pages, request a flush */ if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || atomic_read(&pool->dirty_count) >= pool->max_items / 5) queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); if (invalidate) { if (likely(!in_interrupt())) { rds_ib_flush_mr_pool(pool, 0, NULL); } else { /* We get here if the user created a MR marked * as use_once and invalidate at the same time. */ queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); } } rds_ib_dev_put(rds_ibdev); } void rds_ib_flush_mrs(void) { struct rds_ib_device *rds_ibdev; down_read(&rds_ib_devices_lock); list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { if (rds_ibdev->mr_8k_pool) rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL); if (rds_ibdev->mr_1m_pool) rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL); } up_read(&rds_ib_devices_lock); } u32 rds_ib_get_lkey(void *trans_private) { struct rds_ib_mr *ibmr = trans_private; return ibmr->u.mr->lkey; } void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, struct rds_sock *rs, u32 *key_ret, struct rds_connection *conn, u64 start, u64 length, int need_odp) { struct rds_ib_device *rds_ibdev; struct rds_ib_mr *ibmr = NULL; struct rds_ib_connection *ic = NULL; int ret; rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]); if (!rds_ibdev) { ret = -ENODEV; goto out; } if (need_odp == ODP_ZEROBASED || need_odp == ODP_VIRTUAL) { u64 virt_addr = need_odp == ODP_ZEROBASED ? 0 : start; int access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_ON_DEMAND); struct ib_sge sge = {}; struct ib_mr *ib_mr; if (!rds_ibdev->odp_capable) { ret = -EOPNOTSUPP; goto out; } ib_mr = ib_reg_user_mr(rds_ibdev->pd, start, length, virt_addr, access_flags); if (IS_ERR(ib_mr)) { rdsdebug("rds_ib_get_user_mr returned %d\n", IS_ERR(ib_mr)); ret = PTR_ERR(ib_mr); goto out; } if (key_ret) *key_ret = ib_mr->rkey; ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); if (!ibmr) { ib_dereg_mr(ib_mr); ret = -ENOMEM; goto out; } ibmr->u.mr = ib_mr; ibmr->odp = 1; sge.addr = virt_addr; sge.length = length; sge.lkey = ib_mr->lkey; ib_advise_mr(rds_ibdev->pd, IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, IB_UVERBS_ADVISE_MR_FLAG_FLUSH, &sge, 1); return ibmr; } if (conn) ic = conn->c_transport_data; if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { ret = -ENODEV; goto out; } ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); if (IS_ERR(ibmr)) { ret = PTR_ERR(ibmr); pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); } else { return ibmr; } out: if (rds_ibdev) rds_ib_dev_put(rds_ibdev); return ERR_PTR(ret); } void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) { cancel_delayed_work_sync(&pool->flush_worker); rds_ib_flush_mr_pool(pool, 1, NULL); WARN_ON(atomic_read(&pool->item_count)); WARN_ON(atomic_read(&pool->free_pinned)); kfree(pool); } struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, int pool_type) { struct rds_ib_mr_pool *pool; pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return ERR_PTR(-ENOMEM); pool->pool_type = pool_type; init_llist_head(&pool->free_list); init_llist_head(&pool->drop_list); init_llist_head(&pool->clean_list); spin_lock_init(&pool->clean_lock); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); if (pool_type == RDS_IB_MR_1M_POOL) { /* +1 allows for unaligned MRs */ pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_1m_mrs; } else { /* pool_type == RDS_IB_MR_8K_POOL */ pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_8k_mrs; } pool->max_free_pinned = pool->max_items * pool->max_pages / 4; pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; return pool; } int rds_ib_mr_init(void) { rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0); if (!rds_ib_mr_wq) return -ENOMEM; return 0; } /* By the time this is called all the IB devices should have been torn down and * had their pools freed. As each pool is freed its work struct is waited on, * so the pool flushing work queue should be idle by the time we get here. */ void rds_ib_mr_exit(void) { destroy_workqueue(rds_ib_mr_wq); } static void rds_ib_odp_mr_worker(struct work_struct *work) { struct rds_ib_mr *ibmr; ibmr = container_of(work, struct rds_ib_mr, work.work); ib_dereg_mr(ibmr->u.mr); kfree(ibmr); }
163 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 // SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include <trace/events/mmap_lock.h> #include <linux/mm.h> #include <linux/cgroup.h> #include <linux/memcontrol.h> #include <linux/mmap_lock.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/trace_events.h> #include <linux/local_lock.h> EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); #ifdef CONFIG_TRACING /* * Trace calls must be in a separate file, as otherwise there's a circular * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. */ void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { trace_mmap_lock_start_locking(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { trace_mmap_lock_acquire_returned(mm, write, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { trace_mmap_lock_released(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */
3 26 2 23 1706 82 1733 1732 1733 8 1731 211 1734 1705 1736 2 1734 82 81 1626 26 1622 1589 151 1734 2 1732 1729 1731 2 1737 1733 1731 1737 1732 1734 1737 1733 1731 1731 11 11 1707 1695 45 11 34 34 45 45 45 73 73 6 73 73 73 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 // SPDX-License-Identifier: GPL-2.0 /* * drivers/usb/core/generic.c - generic driver for USB devices (not interfaces) * * (C) Copyright 2005 Greg Kroah-Hartman <gregkh@suse.de> * * based on drivers/usb/usb.c which had the following copyrights: * (C) Copyright Linus Torvalds 1999 * (C) Copyright Johannes Erdfelt 1999-2001 * (C) Copyright Andreas Gal 1999 * (C) Copyright Gregory P. Smith 1999 * (C) Copyright Deti Fliegl 1999 (new USB architecture) * (C) Copyright Randy Dunlap 2000 * (C) Copyright David Brownell 2000-2004 * (C) Copyright Yggdrasil Computing, Inc. 2000 * (usb_device_id matching changes by Adam J. Richter) * (C) Copyright Greg Kroah-Hartman 2002-2003 * * Released under the GPLv2 only. */ #include <linux/usb.h> #include <linux/usb/hcd.h> #include <linux/string_choices.h> #include <uapi/linux/usb/audio.h> #include "usb.h" static int is_rndis(struct usb_interface_descriptor *desc) { return desc->bInterfaceClass == USB_CLASS_COMM && desc->bInterfaceSubClass == 2 && desc->bInterfaceProtocol == 0xff; } static int is_activesync(struct usb_interface_descriptor *desc) { return desc->bInterfaceClass == USB_CLASS_MISC && desc->bInterfaceSubClass == 1 && desc->bInterfaceProtocol == 1; } static bool is_audio(struct usb_interface_descriptor *desc) { return desc->bInterfaceClass == USB_CLASS_AUDIO; } static bool is_uac3_config(struct usb_interface_descriptor *desc) { return desc->bInterfaceProtocol == UAC_VERSION_3; } int usb_choose_configuration(struct usb_device *udev) { int i; int num_configs; int insufficient_power = 0; struct usb_host_config *c, *best; struct usb_device_driver *udriver; /* * If a USB device (not an interface) doesn't have a driver then the * kernel has no business trying to select or install a configuration * for it. */ if (!udev->dev.driver) return -1; udriver = to_usb_device_driver(udev->dev.driver); if (usb_device_is_owned(udev)) return 0; if (udriver->choose_configuration) { i = udriver->choose_configuration(udev); if (i >= 0) return i; } best = NULL; c = udev->config; num_configs = udev->descriptor.bNumConfigurations; for (i = 0; i < num_configs; (i++, c++)) { struct usb_interface_descriptor *desc = NULL; /* It's possible that a config has no interfaces! */ if (c->desc.bNumInterfaces > 0) desc = &c->intf_cache[0]->altsetting->desc; /* * HP's USB bus-powered keyboard has only one configuration * and it claims to be self-powered; other devices may have * similar errors in their descriptors. If the next test * were allowed to execute, such configurations would always * be rejected and the devices would not work as expected. * In the meantime, we run the risk of selecting a config * that requires external power at a time when that power * isn't available. It seems to be the lesser of two evils. * * Bugzilla #6448 reports a device that appears to crash * when it receives a GET_DEVICE_STATUS request! We don't * have any other way to tell whether a device is self-powered, * but since we don't use that information anywhere but here, * the call has been removed. * * Maybe the GET_DEVICE_STATUS call and the test below can * be reinstated when device firmwares become more reliable. * Don't hold your breath. */ #if 0 /* Rule out self-powered configs for a bus-powered device */ if (bus_powered && (c->desc.bmAttributes & USB_CONFIG_ATT_SELFPOWER)) continue; #endif /* * The next test may not be as effective as it should be. * Some hubs have errors in their descriptor, claiming * to be self-powered when they are really bus-powered. * We will overestimate the amount of current such hubs * make available for each port. * * This is a fairly benign sort of failure. It won't * cause us to reject configurations that we should have * accepted. */ /* Rule out configs that draw too much bus current */ if (usb_get_max_power(udev, c) > udev->bus_mA) { insufficient_power++; continue; } /* * Select first configuration as default for audio so that * devices that don't comply with UAC3 protocol are supported. * But, still iterate through other configurations and * select UAC3 compliant config if present. */ if (desc && is_audio(desc)) { /* Always prefer the first found UAC3 config */ if (is_uac3_config(desc)) { best = c; break; } /* If there is no UAC3 config, prefer the first config */ else if (i == 0) best = c; /* Unconditional continue, because the rest of the code * in the loop is irrelevant for audio devices, and * because it can reassign best, which for audio devices * we don't want. */ continue; } /* When the first config's first interface is one of Microsoft's * pet nonstandard Ethernet-over-USB protocols, ignore it unless * this kernel has enabled the necessary host side driver. * But: Don't ignore it if it's the only config. */ if (i == 0 && num_configs > 1 && desc && (is_rndis(desc) || is_activesync(desc))) { #if !defined(CONFIG_USB_NET_RNDIS_HOST) && !defined(CONFIG_USB_NET_RNDIS_HOST_MODULE) continue; #else best = c; #endif } /* From the remaining configs, choose the first one whose * first interface is for a non-vendor-specific class. * Reason: Linux is more likely to have a class driver * than a vendor-specific driver. */ else if (udev->descriptor.bDeviceClass != USB_CLASS_VENDOR_SPEC && (desc && desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC)) { best = c; break; } /* If all the remaining configs are vendor-specific, * choose the first one. */ else if (!best) best = c; } if (insufficient_power > 0) dev_info(&udev->dev, "rejected %d configuration%s " "due to insufficient available bus power\n", insufficient_power, str_plural(insufficient_power)); if (best) { i = best->desc.bConfigurationValue; dev_dbg(&udev->dev, "configuration #%d chosen from %d choice%s\n", i, num_configs, str_plural(num_configs)); } else { i = -1; dev_warn(&udev->dev, "no configuration chosen from %d choice%s\n", num_configs, str_plural(num_configs)); } return i; } EXPORT_SYMBOL_GPL(usb_choose_configuration); static int __check_for_non_generic_match(struct device_driver *drv, void *data) { struct usb_device *udev = data; struct usb_device_driver *udrv; if (!is_usb_device_driver(drv)) return 0; udrv = to_usb_device_driver(drv); if (udrv == &usb_generic_driver) return 0; return usb_driver_applicable(udev, udrv); } static bool usb_generic_driver_match(struct usb_device *udev) { if (udev->use_generic_driver) return true; /* * If any other driver wants the device, leave the device to this other * driver. */ if (bus_for_each_drv(&usb_bus_type, NULL, udev, __check_for_non_generic_match)) return false; return true; } int usb_generic_driver_probe(struct usb_device *udev) { int err, c; /* Choose and set the configuration. This registers the interfaces * with the driver core and lets interface drivers bind to them. */ if (udev->authorized == 0) dev_err(&udev->dev, "Device is not authorized for usage\n"); else { c = usb_choose_configuration(udev); if (c >= 0) { err = usb_set_configuration(udev, c); if (err && err != -ENODEV) { dev_err(&udev->dev, "can't set config #%d, error %d\n", c, err); /* This need not be fatal. The user can try to * set other configurations. */ } } } /* USB device state == configured ... usable */ usb_notify_add_device(udev); return 0; } void usb_generic_driver_disconnect(struct usb_device *udev) { usb_notify_remove_device(udev); /* if this is only an unbind, not a physical disconnect, then * unconfigure the device */ if (udev->actconfig) usb_set_configuration(udev, -1); } #ifdef CONFIG_PM int usb_generic_driver_suspend(struct usb_device *udev, pm_message_t msg) { int rc; /* Normal USB devices suspend through their upstream port. * Root hubs don't have upstream ports to suspend, * so we have to shut down their downstream HC-to-USB * interfaces manually by doing a bus (or "global") suspend. */ if (!udev->parent) rc = hcd_bus_suspend(udev, msg); /* * Non-root USB2 devices don't need to do anything for FREEZE * or PRETHAW. USB3 devices don't support global suspend and * needs to be selectively suspended. */ else if ((msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW) && (udev->speed < USB_SPEED_SUPER)) rc = 0; else rc = usb_port_suspend(udev, msg); if (rc == 0) usbfs_notify_suspend(udev); return rc; } int usb_generic_driver_resume(struct usb_device *udev, pm_message_t msg) { int rc; /* Normal USB devices resume/reset through their upstream port. * Root hubs don't have upstream ports to resume or reset, * so we have to start up their downstream HC-to-USB * interfaces manually by doing a bus (or "global") resume. */ if (!udev->parent) rc = hcd_bus_resume(udev, msg); else rc = usb_port_resume(udev, msg); if (rc == 0) usbfs_notify_resume(udev); return rc; } #endif /* CONFIG_PM */ struct usb_device_driver usb_generic_driver = { .name = "usb", .match = usb_generic_driver_match, .probe = usb_generic_driver_probe, .disconnect = usb_generic_driver_disconnect, #ifdef CONFIG_PM .suspend = usb_generic_driver_suspend, .resume = usb_generic_driver_resume, #endif .supports_autosuspend = 1, };
46 46 36 3 5 2 52 3 8 6 52 52 6 1 3 8 129 12 2 129 52 2 18 2 2 2 2 136 138 137 136 117 116 1 2 1 1 19 132 8 7 6 5 4 3 3 2 5 4 3 3 4 4 9 4 133 12 129 131 130 129 9 9 1 1 1 1 9 5 3 3 3 3 37 37 37 3 37 38 37 37 36 38 20 20 3 3 17 17 3 3 3 3 3 3 3 3 3 3 62 62 61 61 2 1 43 6 5 5 5 1 4 4 1 1 3 3 1 8 6 1 50 50 50 50 21 35 34 34 3 3 32 3 31 32 34 35 21 21 35 35 8 8 8 3 3 3 3 3 3 20 20 20 20 18 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 // SPDX-License-Identifier: GPL-2.0 /* * This file contains helper code to handle channel * settings and keeping track of what is possible at * any point in time. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2018-2024 Intel Corporation */ #include <linux/export.h> #include <linux/bitfield.h> #include <net/cfg80211.h> #include "core.h" #include "rdev-ops.h" static bool cfg80211_valid_60g_freq(u32 freq) { return freq >= 58320 && freq <= 70200; } void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, enum nl80211_channel_type chan_type) { if (WARN_ON(!chan)) return; *chandef = (struct cfg80211_chan_def) { .chan = chan, .freq1_offset = chan->freq_offset, }; switch (chan_type) { case NL80211_CHAN_NO_HT: chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = chan->center_freq; break; case NL80211_CHAN_HT20: chandef->width = NL80211_CHAN_WIDTH_20; chandef->center_freq1 = chan->center_freq; break; case NL80211_CHAN_HT40PLUS: chandef->width = NL80211_CHAN_WIDTH_40; chandef->center_freq1 = chan->center_freq + 10; break; case NL80211_CHAN_HT40MINUS: chandef->width = NL80211_CHAN_WIDTH_40; chandef->center_freq1 = chan->center_freq - 10; break; default: WARN_ON(1); } } EXPORT_SYMBOL(cfg80211_chandef_create); static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) { return nl80211_chan_width_to_mhz(c->width); } static u32 cfg80211_get_start_freq(const struct cfg80211_chan_def *chandef, u32 cf) { u32 start_freq, center_freq, bandwidth; center_freq = MHZ_TO_KHZ((cf == 1) ? chandef->center_freq1 : chandef->center_freq2); bandwidth = MHZ_TO_KHZ(cfg80211_chandef_get_width(chandef)); if (bandwidth <= MHZ_TO_KHZ(20)) start_freq = center_freq; else start_freq = center_freq - bandwidth / 2 + MHZ_TO_KHZ(10); return start_freq; } static u32 cfg80211_get_end_freq(const struct cfg80211_chan_def *chandef, u32 cf) { u32 end_freq, center_freq, bandwidth; center_freq = MHZ_TO_KHZ((cf == 1) ? chandef->center_freq1 : chandef->center_freq2); bandwidth = MHZ_TO_KHZ(cfg80211_chandef_get_width(chandef)); if (bandwidth <= MHZ_TO_KHZ(20)) end_freq = center_freq; else end_freq = center_freq + bandwidth / 2 - MHZ_TO_KHZ(10); return end_freq; } #define for_each_subchan(chandef, freq, cf) \ for (u32 punctured = chandef->punctured, \ cf = 1, freq = cfg80211_get_start_freq(chandef, cf); \ freq <= cfg80211_get_end_freq(chandef, cf); \ freq += MHZ_TO_KHZ(20), \ ((cf == 1 && chandef->center_freq2 != 0 && \ freq > cfg80211_get_end_freq(chandef, cf)) ? \ (cf++, freq = cfg80211_get_start_freq(chandef, cf), \ punctured = 0) : (punctured >>= 1))) \ if (!(punctured & 1)) struct cfg80211_per_bw_puncturing_values { u8 len; const u16 *valid_values; }; static const u16 puncturing_values_80mhz[] = { 0x8, 0x4, 0x2, 0x1 }; static const u16 puncturing_values_160mhz[] = { 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3 }; static const u16 puncturing_values_320mhz[] = { 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00, 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f, 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f }; #define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \ { \ .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \ .valid_values = puncturing_values_ ## _bw ## mhz \ } static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = { CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80), CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160), CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320) }; static bool valid_puncturing_bitmap(const struct cfg80211_chan_def *chandef) { u32 idx, i, start_freq, primary_center = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_80: idx = 0; start_freq = chandef->center_freq1 - 40; break; case NL80211_CHAN_WIDTH_160: idx = 1; start_freq = chandef->center_freq1 - 80; break; case NL80211_CHAN_WIDTH_320: idx = 2; start_freq = chandef->center_freq1 - 160; break; default: return chandef->punctured == 0; } if (!chandef->punctured) return true; /* check if primary channel is punctured */ if (chandef->punctured & (u16)BIT((primary_center - start_freq) / 20)) return false; for (i = 0; i < per_bw_puncturing[idx].len; i++) { if (per_bw_puncturing[idx].valid_values[i] == chandef->punctured) return true; } return false; } static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) { int max_contiguous = 0; int num_of_enabled = 0; int contiguous = 0; int i; if (!chandef->edmg.channels || !chandef->edmg.bw_config) return false; if (!cfg80211_valid_60g_freq(chandef->chan->center_freq)) return false; for (i = 0; i < 6; i++) { if (chandef->edmg.channels & BIT(i)) { contiguous++; num_of_enabled++; } else { contiguous = 0; } max_contiguous = max(contiguous, max_contiguous); } /* basic verification of edmg configuration according to * IEEE P802.11ay/D4.0 section 9.4.2.251 */ /* check bw_config against contiguous edmg channels */ switch (chandef->edmg.bw_config) { case IEEE80211_EDMG_BW_CONFIG_4: case IEEE80211_EDMG_BW_CONFIG_8: case IEEE80211_EDMG_BW_CONFIG_12: if (max_contiguous < 1) return false; break; case IEEE80211_EDMG_BW_CONFIG_5: case IEEE80211_EDMG_BW_CONFIG_9: case IEEE80211_EDMG_BW_CONFIG_13: if (max_contiguous < 2) return false; break; case IEEE80211_EDMG_BW_CONFIG_6: case IEEE80211_EDMG_BW_CONFIG_10: case IEEE80211_EDMG_BW_CONFIG_14: if (max_contiguous < 3) return false; break; case IEEE80211_EDMG_BW_CONFIG_7: case IEEE80211_EDMG_BW_CONFIG_11: case IEEE80211_EDMG_BW_CONFIG_15: if (max_contiguous < 4) return false; break; default: return false; } /* check bw_config against aggregated (non contiguous) edmg channels */ switch (chandef->edmg.bw_config) { case IEEE80211_EDMG_BW_CONFIG_4: case IEEE80211_EDMG_BW_CONFIG_5: case IEEE80211_EDMG_BW_CONFIG_6: case IEEE80211_EDMG_BW_CONFIG_7: break; case IEEE80211_EDMG_BW_CONFIG_8: case IEEE80211_EDMG_BW_CONFIG_9: case IEEE80211_EDMG_BW_CONFIG_10: case IEEE80211_EDMG_BW_CONFIG_11: if (num_of_enabled < 2) return false; break; case IEEE80211_EDMG_BW_CONFIG_12: case IEEE80211_EDMG_BW_CONFIG_13: case IEEE80211_EDMG_BW_CONFIG_14: case IEEE80211_EDMG_BW_CONFIG_15: if (num_of_enabled < 4 || max_contiguous < 2) return false; break; default: return false; } return true; } int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width) { int mhz; switch (chan_width) { case NL80211_CHAN_WIDTH_1: mhz = 1; break; case NL80211_CHAN_WIDTH_2: mhz = 2; break; case NL80211_CHAN_WIDTH_4: mhz = 4; break; case NL80211_CHAN_WIDTH_8: mhz = 8; break; case NL80211_CHAN_WIDTH_16: mhz = 16; break; case NL80211_CHAN_WIDTH_5: mhz = 5; break; case NL80211_CHAN_WIDTH_10: mhz = 10; break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: mhz = 20; break; case NL80211_CHAN_WIDTH_40: mhz = 40; break; case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_80: mhz = 80; break; case NL80211_CHAN_WIDTH_160: mhz = 160; break; case NL80211_CHAN_WIDTH_320: mhz = 320; break; default: WARN_ON_ONCE(1); return -1; } return mhz; } EXPORT_SYMBOL(nl80211_chan_width_to_mhz); static bool cfg80211_valid_center_freq(u32 center, enum nl80211_chan_width width) { int bw; int step; /* We only do strict verification on 6 GHz */ if (center < 5955 || center > 7115) return true; bw = nl80211_chan_width_to_mhz(width); if (bw < 0) return false; /* Validate that the channels bw is entirely within the 6 GHz band */ if (center - bw / 2 < 5945 || center + bw / 2 > 7125) return false; /* With 320 MHz the permitted channels overlap */ if (bw == 320) step = 160; else step = bw; /* * Valid channels are packed from lowest frequency towards higher ones. * So test that the lower frequency aligns with one of these steps. */ return (center - bw / 2 - 5945) % step == 0; } bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq, oper_freq; int oper_width, control_width; if (!chandef->chan) return false; if (chandef->freq1_offset >= 1000) return false; control_freq = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: if (ieee80211_chandef_to_khz(chandef) != ieee80211_channel_to_khz(chandef->chan)) return false; if (chandef->center_freq2) return false; break; case NL80211_CHAN_WIDTH_1: case NL80211_CHAN_WIDTH_2: case NL80211_CHAN_WIDTH_4: case NL80211_CHAN_WIDTH_8: case NL80211_CHAN_WIDTH_16: if (chandef->chan->band != NL80211_BAND_S1GHZ) return false; control_freq = ieee80211_channel_to_khz(chandef->chan); oper_freq = ieee80211_chandef_to_khz(chandef); control_width = nl80211_chan_width_to_mhz( ieee80211_s1g_channel_width( chandef->chan)); oper_width = cfg80211_chandef_get_width(chandef); if (oper_width < 0 || control_width < 0) return false; if (chandef->center_freq2) return false; if (control_freq + MHZ_TO_KHZ(control_width) / 2 > oper_freq + MHZ_TO_KHZ(oper_width) / 2) return false; if (control_freq - MHZ_TO_KHZ(control_width) / 2 < oper_freq - MHZ_TO_KHZ(oper_width) / 2) return false; break; case NL80211_CHAN_WIDTH_80P80: if (!chandef->center_freq2) return false; /* adjacent is not allowed -- that's a 160 MHz channel */ if (chandef->center_freq1 - chandef->center_freq2 == 80 || chandef->center_freq2 - chandef->center_freq1 == 80) return false; break; default: if (chandef->center_freq2) return false; break; } switch (chandef->width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_1: case NL80211_CHAN_WIDTH_2: case NL80211_CHAN_WIDTH_4: case NL80211_CHAN_WIDTH_8: case NL80211_CHAN_WIDTH_16: /* all checked above */ break; case NL80211_CHAN_WIDTH_320: if (chandef->center_freq1 == control_freq + 150 || chandef->center_freq1 == control_freq + 130 || chandef->center_freq1 == control_freq + 110 || chandef->center_freq1 == control_freq + 90 || chandef->center_freq1 == control_freq - 90 || chandef->center_freq1 == control_freq - 110 || chandef->center_freq1 == control_freq - 130 || chandef->center_freq1 == control_freq - 150) break; fallthrough; case NL80211_CHAN_WIDTH_160: if (chandef->center_freq1 == control_freq + 70 || chandef->center_freq1 == control_freq + 50 || chandef->center_freq1 == control_freq - 50 || chandef->center_freq1 == control_freq - 70) break; fallthrough; case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_80: if (chandef->center_freq1 == control_freq + 30 || chandef->center_freq1 == control_freq - 30) break; fallthrough; case NL80211_CHAN_WIDTH_40: if (chandef->center_freq1 == control_freq + 10 || chandef->center_freq1 == control_freq - 10) break; fallthrough; default: return false; } if (!cfg80211_valid_center_freq(chandef->center_freq1, chandef->width)) return false; if (chandef->width == NL80211_CHAN_WIDTH_80P80 && !cfg80211_valid_center_freq(chandef->center_freq2, chandef->width)) return false; /* channel 14 is only for IEEE 802.11b */ if (chandef->center_freq1 == 2484 && chandef->width != NL80211_CHAN_WIDTH_20_NOHT) return false; if (cfg80211_chandef_is_edmg(chandef) && !cfg80211_edmg_chandef_valid(chandef)) return false; return valid_puncturing_bitmap(chandef); } EXPORT_SYMBOL(cfg80211_chandef_valid); int cfg80211_chandef_primary(const struct cfg80211_chan_def *c, enum nl80211_chan_width primary_chan_width, u16 *punctured) { int pri_width = nl80211_chan_width_to_mhz(primary_chan_width); int width = cfg80211_chandef_get_width(c); u32 control = c->chan->center_freq; u32 center = c->center_freq1; u16 _punct = 0; if (WARN_ON_ONCE(pri_width < 0 || width < 0)) return -1; /* not intended to be called this way, can't determine */ if (WARN_ON_ONCE(pri_width > width)) return -1; if (!punctured) punctured = &_punct; *punctured = c->punctured; while (width > pri_width) { unsigned int bits_to_drop = width / 20 / 2; if (control > center) { center += width / 4; *punctured >>= bits_to_drop; } else { center -= width / 4; *punctured &= (1 << bits_to_drop) - 1; } width /= 2; } return center; } EXPORT_SYMBOL(cfg80211_chandef_primary); static const struct cfg80211_chan_def * check_chandef_primary_compat(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2, enum nl80211_chan_width primary_chan_width) { u16 punct_c1 = 0, punct_c2 = 0; /* check primary is compatible -> error if not */ if (cfg80211_chandef_primary(c1, primary_chan_width, &punct_c1) != cfg80211_chandef_primary(c2, primary_chan_width, &punct_c2)) return ERR_PTR(-EINVAL); if (punct_c1 != punct_c2) return ERR_PTR(-EINVAL); /* assumes c1 is smaller width, if that was just checked -> done */ if (c1->width == primary_chan_width) return c2; /* otherwise continue checking the next width */ return NULL; } static const struct cfg80211_chan_def * _cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) { const struct cfg80211_chan_def *ret; /* If they are identical, return */ if (cfg80211_chandef_identical(c1, c2)) return c2; /* otherwise, must have same control channel */ if (c1->chan != c2->chan) return NULL; /* * If they have the same width, but aren't identical, * then they can't be compatible. */ if (c1->width == c2->width) return NULL; /* * can't be compatible if one of them is 5/10 MHz or S1G * but they don't have the same width. */ #define NARROW_OR_S1G(width) ((width) == NL80211_CHAN_WIDTH_5 || \ (width) == NL80211_CHAN_WIDTH_10 || \ (width) == NL80211_CHAN_WIDTH_1 || \ (width) == NL80211_CHAN_WIDTH_2 || \ (width) == NL80211_CHAN_WIDTH_4 || \ (width) == NL80211_CHAN_WIDTH_8 || \ (width) == NL80211_CHAN_WIDTH_16) if (NARROW_OR_S1G(c1->width) || NARROW_OR_S1G(c2->width)) return NULL; /* * Make sure that c1 is always the narrower one, so that later * we either return NULL or c2 and don't have to check both * directions. */ if (c1->width > c2->width) swap(c1, c2); /* * No further checks needed if the "narrower" one is only 20 MHz. * Here "narrower" includes being a 20 MHz non-HT channel vs. a * 20 MHz HT (or later) one. */ if (c1->width <= NL80211_CHAN_WIDTH_20) return c2; ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_40); if (ret) return ret; ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_80); if (ret) return ret; /* * If c1 is 80+80, then c2 is 160 or higher, but that cannot * match. If c2 was also 80+80 it was already either accepted * or rejected above (identical or not, respectively.) */ if (c1->width == NL80211_CHAN_WIDTH_80P80) return NULL; ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_160); if (ret) return ret; /* * Getting here would mean they're both wider than 160, have the * same primary 160, but are not identical - this cannot happen * since they must be 320 (no wider chandefs exist, at least yet.) */ WARN_ON_ONCE(1); return NULL; } const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) { const struct cfg80211_chan_def *ret; ret = _cfg80211_chandef_compatible(c1, c2); if (IS_ERR(ret)) return NULL; return ret; } EXPORT_SYMBOL(cfg80211_chandef_compatible); void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state) { struct ieee80211_channel *c; int width; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return; width = cfg80211_chandef_get_width(chandef); if (width < 0) return; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) continue; c->dfs_state = dfs_state; c->dfs_state_entered = jiffies; } } static bool cfg80211_dfs_permissive_check_wdev(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, struct wireless_dev *wdev, struct ieee80211_channel *chan) { unsigned int link_id; for_each_valid_link(wdev, link_id) { struct ieee80211_channel *other_chan = NULL; struct cfg80211_chan_def chandef = {}; int ret; /* In order to avoid daisy chaining only allow BSS STA */ if (wdev->iftype != NL80211_IFTYPE_STATION || !wdev->links[link_id].client.current_bss) continue; other_chan = wdev->links[link_id].client.current_bss->pub.channel; if (!other_chan) continue; if (chan == other_chan) return true; /* continue if we can't get the channel */ ret = rdev_get_channel(rdev, wdev, link_id, &chandef); if (ret) continue; if (cfg80211_is_sub_chan(&chandef, chan, false)) return true; } return false; } /* * Check if P2P GO is allowed to operate on a DFS channel */ static bool cfg80211_dfs_permissive_chan(struct wiphy *wiphy, enum nl80211_iftype iftype, struct ieee80211_channel *chan) { struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_held(&rdev->wiphy.mtx); if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_DFS_CONCURRENT) || !(chan->flags & IEEE80211_CHAN_DFS_CONCURRENT)) return false; /* only valid for P2P GO */ if (iftype != NL80211_IFTYPE_P2P_GO) return false; /* * Allow only if there's a concurrent BSS */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { bool ret = cfg80211_dfs_permissive_check_wdev(rdev, iftype, wdev, chan); if (ret) return ret; } return false; } static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { struct ieee80211_channel *c; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return -EINVAL; if (c->flags & IEEE80211_CHAN_RADAR && !cfg80211_dfs_permissive_chan(wiphy, iftype, c)) return 1; } return 0; } int cfg80211_chandef_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { int width; int ret; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; switch (iftype) { case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: width = cfg80211_chandef_get_width(chandef); if (width < 0) return -EINVAL; ret = cfg80211_get_chans_dfs_required(wiphy, chandef, iftype); return (ret > 0) ? BIT(chandef->width) : ret; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: WARN_ON(1); } return 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { struct ieee80211_channel *c; int width, count = 0; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; width = cfg80211_chandef_get_width(chandef); if (width < 0) return false; /* * Check entire range of channels for the bandwidth. * Check all channels are DFS channels (DFS_USABLE or * DFS_AVAILABLE). Return number of usable channels * (require CAC). Allow DFS and non-DFS channel mix. */ for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; if (c->flags & IEEE80211_CHAN_DISABLED) return false; if (c->flags & IEEE80211_CHAN_RADAR) { if (c->dfs_state == NL80211_DFS_UNAVAILABLE) return false; if (c->dfs_state == NL80211_DFS_USABLE) count++; } } return count > 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_usable); /* * Checks if center frequency of chan falls with in the bandwidth * range of chandef. */ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, bool primary_only) { int width; u32 freq; if (!chandef->chan) return false; if (chandef->chan->center_freq == chan->center_freq) return true; if (primary_only) return false; width = cfg80211_chandef_get_width(chandef); if (width <= 20) return false; for (freq = chandef->center_freq1 - width / 2 + 10; freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { if (chan->center_freq == freq) return true; } if (!chandef->center_freq2) return false; for (freq = chandef->center_freq2 - width / 2 + 10; freq <= chandef->center_freq2 + width / 2 - 10; freq += 20) { if (chan->center_freq == freq) return true; } return false; } bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) { unsigned int link; lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: for_each_valid_link(wdev, link) { if (wdev->links[link].ap.beacon_interval) return true; } break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.ssid_len) return true; break; case NL80211_IFTYPE_MESH_POINT: if (wdev->u.mesh.id_len) return true; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: /* Can NAN type be considered as beaconing interface? */ case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: WARN_ON(1); } return false; } bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, struct ieee80211_channel *chan, bool primary_only) { unsigned int link; switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: for_each_valid_link(wdev, link) { if (cfg80211_is_sub_chan(&wdev->links[link].ap.chandef, chan, primary_only)) return true; } break; case NL80211_IFTYPE_ADHOC: return cfg80211_is_sub_chan(&wdev->u.ibss.chandef, chan, primary_only); case NL80211_IFTYPE_MESH_POINT: return cfg80211_is_sub_chan(&wdev->u.mesh.chandef, chan, primary_only); default: break; } return false; } static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan) { struct wireless_dev *wdev; lockdep_assert_wiphy(wiphy); list_for_each_entry(wdev, &wiphy->wdev_list, list) { if (!cfg80211_beaconing_iface_active(wdev)) continue; if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) return true; } return false; } static bool cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, struct ieee80211_channel *channel) { if (!rdev->background_radar_wdev) return false; if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) return false; return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel, false); } bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan) { struct cfg80211_registered_device *rdev; ASSERT_RTNL(); if (!(chan->flags & IEEE80211_CHAN_RADAR)) return false; for_each_rdev(rdev) { bool found; if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; guard(wiphy)(&rdev->wiphy); found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) || cfg80211_offchan_chain_is_active(rdev, chan); if (found) return true; } return false; } static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { struct ieee80211_channel *c; int width; bool dfs_offload; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; width = cfg80211_chandef_get_width(chandef); if (width < 0) return false; dfs_offload = wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD); /* * Check entire range of channels for the bandwidth. * If any channel in between is disabled or has not * had gone through CAC return false */ for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; if (c->flags & IEEE80211_CHAN_DISABLED) return false; if ((c->flags & IEEE80211_CHAN_RADAR) && (c->dfs_state != NL80211_DFS_AVAILABLE) && !(c->dfs_state == NL80211_DFS_USABLE && dfs_offload)) return false; } return true; } unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { struct ieee80211_channel *c; int width; unsigned int t1 = 0, t2 = 0; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return 0; width = cfg80211_chandef_get_width(chandef); if (width < 0) return 0; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c || (c->flags & IEEE80211_CHAN_DISABLED)) { if (cf == 1) t1 = INT_MAX; else t2 = INT_MAX; continue; } if (!(c->flags & IEEE80211_CHAN_RADAR)) continue; if (cf == 1 && c->dfs_cac_ms > t1) t1 = c->dfs_cac_ms; if (cf == 2 && c->dfs_cac_ms > t2) t2 = c->dfs_cac_ms; } if (t1 == INT_MAX && t2 == INT_MAX) return 0; if (t1 == INT_MAX) return t2; if (t2 == INT_MAX) return t1; return max(t1, t2); } EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time); /* check if the operating channels are valid and supported */ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels, enum ieee80211_edmg_bw_config edmg_bw_config, int primary_channel, struct ieee80211_edmg *edmg_cap) { struct ieee80211_channel *chan; int i, freq; int channels_counter = 0; if (!edmg_channels && !edmg_bw_config) return true; if ((!edmg_channels && edmg_bw_config) || (edmg_channels && !edmg_bw_config)) return false; if (!(edmg_channels & BIT(primary_channel - 1))) return false; /* 60GHz channels 1..6 */ for (i = 0; i < 6; i++) { if (!(edmg_channels & BIT(i))) continue; if (!(edmg_cap->channels & BIT(i))) return false; channels_counter++; freq = ieee80211_channel_to_frequency(i + 1, NL80211_BAND_60GHZ); chan = ieee80211_get_channel(wiphy, freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) return false; } /* IEEE802.11 allows max 4 channels */ if (channels_counter > 4) return false; /* check bw_config is a subset of what driver supports * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13) */ if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4)) return false; if (edmg_bw_config > edmg_cap->bw_config) return false; return true; } bool _cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags, u32 permitting_flags) { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; struct ieee80211_edmg *edmg_cap; u32 width, control_freq, cap; bool ext_nss_cap, support_80_80 = false, support_320 = false; const struct ieee80211_sband_iftype_data *iftd; struct ieee80211_supported_band *sband; struct ieee80211_channel *c; int i; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap; vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap; edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap; ext_nss_cap = __le16_to_cpu(vht_cap->vht_mcs.tx_highest) & IEEE80211_VHT_EXT_NSS_BW_CAPABLE; if (edmg_cap->channels && !cfg80211_edmg_usable(wiphy, chandef->edmg.channels, chandef->edmg.bw_config, chandef->chan->hw_value, edmg_cap)) return false; control_freq = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_1: width = 1; break; case NL80211_CHAN_WIDTH_2: width = 2; break; case NL80211_CHAN_WIDTH_4: width = 4; break; case NL80211_CHAN_WIDTH_8: width = 8; break; case NL80211_CHAN_WIDTH_16: width = 16; break; case NL80211_CHAN_WIDTH_5: width = 5; break; case NL80211_CHAN_WIDTH_10: prohibited_flags |= IEEE80211_CHAN_NO_10MHZ; width = 10; break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported && chandef->chan->band != NL80211_BAND_6GHZ) return false; fallthrough; case NL80211_CHAN_WIDTH_20_NOHT: prohibited_flags |= IEEE80211_CHAN_NO_20MHZ; width = 20; break; case NL80211_CHAN_WIDTH_40: width = 40; if (chandef->chan->band == NL80211_BAND_6GHZ) break; if (!ht_cap->ht_supported) return false; if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) return false; if (chandef->center_freq1 < control_freq && chandef->chan->flags & IEEE80211_CHAN_NO_HT40MINUS) return false; if (chandef->center_freq1 > control_freq && chandef->chan->flags & IEEE80211_CHAN_NO_HT40PLUS) return false; break; case NL80211_CHAN_WIDTH_80P80: cap = vht_cap->cap; support_80_80 = (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) || (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) || (ext_nss_cap && u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1); if (chandef->chan->band != NL80211_BAND_6GHZ && !support_80_80) return false; fallthrough; case NL80211_CHAN_WIDTH_80: prohibited_flags |= IEEE80211_CHAN_NO_80MHZ; width = 80; if (chandef->chan->band == NL80211_BAND_6GHZ) break; if (!vht_cap->vht_supported) return false; break; case NL80211_CHAN_WIDTH_160: prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; width = 160; if (chandef->chan->band == NL80211_BAND_6GHZ) break; if (!vht_cap->vht_supported) return false; cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ && !(ext_nss_cap && (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))) return false; break; case NL80211_CHAN_WIDTH_320: prohibited_flags |= IEEE80211_CHAN_NO_320MHZ; width = 320; if (chandef->chan->band != NL80211_BAND_6GHZ) return false; sband = wiphy->bands[NL80211_BAND_6GHZ]; if (!sband) return false; for_each_sband_iftype_data(sband, i, iftd) { if (!iftd->eht_cap.has_eht) continue; if (iftd->eht_cap.eht_cap_elem.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) { support_320 = true; break; } } if (!support_320) return false; break; default: WARN_ON_ONCE(1); return false; } /* * TODO: What if there are only certain 80/160/80+80 MHz channels * allowed by the driver, or only certain combinations? * For 40 MHz the driver can set the NO_HT40 flags, but for * 80/160 MHz and in particular 80+80 MHz this isn't really * feasible and we only have NO_80MHZ/NO_160MHZ so far but * no way to cover 80+80 MHz or more complex restrictions. * Note that such restrictions also need to be advertised to * userspace, for example for P2P channel selection. */ if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; /* 5 and 10 MHz are only defined for the OFDM PHY */ if (width < 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; if (c->flags & permitting_flags) continue; if (c->flags & prohibited_flags) return false; } return true; } bool cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags) { return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); } EXPORT_SYMBOL(cfg80211_chandef_usable); static bool cfg80211_ir_permissive_check_wdev(enum nl80211_iftype iftype, struct wireless_dev *wdev, struct ieee80211_channel *chan) { struct ieee80211_channel *other_chan = NULL; unsigned int link_id; int r1, r2; for_each_valid_link(wdev, link_id) { if (wdev->iftype == NL80211_IFTYPE_STATION && wdev->links[link_id].client.current_bss) other_chan = wdev->links[link_id].client.current_bss->pub.channel; /* * If a GO already operates on the same GO_CONCURRENT channel, * this one (maybe the same one) can beacon as well. We allow * the operation even if the station we relied on with * GO_CONCURRENT is disconnected now. But then we must make sure * we're not outdoor on an indoor-only channel. */ if (iftype == NL80211_IFTYPE_P2P_GO && wdev->iftype == NL80211_IFTYPE_P2P_GO && wdev->links[link_id].ap.beacon_interval && !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) other_chan = wdev->links[link_id].ap.chandef.chan; if (!other_chan) continue; if (chan == other_chan) return true; if (chan->band != NL80211_BAND_5GHZ && chan->band != NL80211_BAND_6GHZ) continue; r1 = cfg80211_get_unii(chan->center_freq); r2 = cfg80211_get_unii(other_chan->center_freq); if (r1 != -EINVAL && r1 == r2) { /* * At some locations channels 149-165 are considered a * bundle, but at other locations, e.g., Indonesia, * channels 149-161 are considered a bundle while * channel 165 is left out and considered to be in a * different bundle. Thus, in case that there is a * station interface connected to an AP on channel 165, * it is assumed that channels 149-161 are allowed for * GO operations. However, having a station interface * connected to an AP on channels 149-161, does not * allow GO operation on channel 165. */ if (chan->center_freq == 5825 && other_chan->center_freq != 5825) continue; return true; } } return false; } /* * Check if the channel can be used under permissive conditions mandated by * some regulatory bodies, i.e., the channel is marked with * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface * associated to an AP on the same channel or on the same UNII band * (assuming that the AP is an authorized master). * In addition allow operation on a channel on which indoor operation is * allowed, iff we are currently operating in an indoor environment. */ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, enum nl80211_iftype iftype, struct ieee80211_channel *chan) { struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_held(&rdev->wiphy.mtx); if (!IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) || !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) return false; /* only valid for GO and TDLS off-channel (station/p2p-CL) */ if (iftype != NL80211_IFTYPE_P2P_GO && iftype != NL80211_IFTYPE_STATION && iftype != NL80211_IFTYPE_P2P_CLIENT) return false; if (regulatory_indoor_allowed() && (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) return true; if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT)) return false; /* * Generally, it is possible to rely on another device/driver to allow * the IR concurrent relaxation, however, since the device can further * enforce the relaxation (by doing a similar verifications as this), * and thus fail the GO instantiation, consider only the interfaces of * the current registered device. */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { bool ret; ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan); if (ret) return ret; } return false; } static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype, u32 prohibited_flags, u32 permitting_flags) { bool res, check_radar; int dfs_required; trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, prohibited_flags, permitting_flags); if (!_cfg80211_chandef_usable(wiphy, chandef, IEEE80211_CHAN_DISABLED, 0)) return false; dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype); check_radar = dfs_required != 0; if (dfs_required > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ prohibited_flags &= ~IEEE80211_CHAN_NO_IR; check_radar = false; } if (check_radar && !_cfg80211_chandef_usable(wiphy, chandef, IEEE80211_CHAN_RADAR, 0)) return false; res = _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, permitting_flags); trace_cfg80211_return_bool(res); return res; } bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); u32 permitting_flags = 0; bool check_no_ir = true; /* * Under certain conditions suggested by some regulatory bodies a * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag * only if such relaxations are not enabled and the conditions are not * met. */ if (cfg->relax) { lockdep_assert_held(&rdev->wiphy.mtx); check_no_ir = !cfg80211_ir_permissive_chan(wiphy, cfg->iftype, chandef->chan); } if (cfg->reg_power == IEEE80211_REG_VLP_AP) permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, check_no_ir ? IEEE80211_CHAN_NO_IR : 0, permitting_flags); } EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_chan_def *chandef) { if (!rdev->ops->set_monitor_channel) return -EOPNOTSUPP; if (!cfg80211_has_monitors_only(rdev)) return -EBUSY; return rdev_set_monitor_channel(rdev, dev, chandef); } bool cfg80211_any_usable_channels(struct wiphy *wiphy, unsigned long sband_mask, u32 prohibited_flags) { int idx; prohibited_flags |= IEEE80211_CHAN_DISABLED; for_each_set_bit(idx, &sband_mask, NUM_NL80211_BANDS) { struct ieee80211_supported_band *sband = wiphy->bands[idx]; int chanidx; if (!sband) continue; for (chanidx = 0; chanidx < sband->n_channels; chanidx++) { struct ieee80211_channel *chan; chan = &sband->channels[chanidx]; if (chan->flags & prohibited_flags) continue; return true; } } return false; } EXPORT_SYMBOL(cfg80211_any_usable_channels); struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, unsigned int link_id) { lockdep_assert_wiphy(wdev->wiphy); WARN_ON(wdev->valid_links && !(wdev->valid_links & BIT(link_id))); WARN_ON(!wdev->valid_links && link_id > 0); switch (wdev->iftype) { case NL80211_IFTYPE_MESH_POINT: return &wdev->u.mesh.chandef; case NL80211_IFTYPE_ADHOC: return &wdev->u.ibss.chandef; case NL80211_IFTYPE_OCB: return &wdev->u.ocb.chandef; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: return &wdev->links[link_id].ap.chandef; default: return NULL; } } EXPORT_SYMBOL(wdev_chandef);
18 18 8 8 8 2 7 6 6 18 9 1 8 1 7 9 1 6 5 5 5 1 1 1 1 9 16 16 16 16 11 1 1 1 16 9 3 9 9 10 10 16 16 15 15 15 5 4 7 8 9 14 2 14 14 14 14 14 15 16 9 8 7 7 7 7 7 7 9 2 2 3 2 1 2 1 2 4 3 3 7 15 15 9 8 7 7 14 14 14 14 14 13 7 13 13 6 6 13 7 13 15 2 6 5 6 5 3 5 4 4 2 4 1 4 1 5 5 7 2 2 2 4 3 35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 // SPDX-License-Identifier: GPL-2.0-or-later /* L2TPv3 IP encapsulation support * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <asm/ioctls.h> #include <linux/icmp.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/random.h> #include <linux/socket.h> #include <linux/l2tp.h> #include <linux/in.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> #include <net/protocol.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include "l2tp_core.h" /* per-net private data for this module */ static unsigned int l2tp_ip_net_id; struct l2tp_ip_net { rwlock_t l2tp_ip_lock; struct hlist_head l2tp_ip_table; struct hlist_head l2tp_ip_bind_table; }; struct l2tp_ip_sock { /* inet_sock has to be the first member of l2tp_ip_sock */ struct inet_sock inet; u32 conn_id; u32 peer_conn_id; }; static struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) { return (struct l2tp_ip_sock *)sk; } static struct l2tp_ip_net *l2tp_ip_pernet(const struct net *net) { return net_generic(net, l2tp_ip_net_id); } static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr, __be32 raddr, int dif, u32 tunnel_id) { struct l2tp_ip_net *pn = l2tp_ip_pernet(net); struct sock *sk; sk_for_each_bound(sk, &pn->l2tp_ip_bind_table) { const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk); const struct inet_sock *inet = inet_sk(sk); int bound_dev_if; if (!net_eq(sock_net(sk), net)) continue; bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); if (bound_dev_if && dif && bound_dev_if != dif) continue; if (inet->inet_rcv_saddr && laddr && inet->inet_rcv_saddr != laddr) continue; if (inet->inet_daddr && raddr && inet->inet_daddr != raddr) continue; if (l2tp->conn_id != tunnel_id) continue; goto found; } sk = NULL; found: return sk; } /* When processing receive frames, there are two cases to * consider. Data frames consist of a non-zero session-id and an * optional cookie. Control frames consist of a regular L2TP header * preceded by 32-bits of zeros. * * L2TPv3 Session Header Over IP * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Cookie (optional, maximum 64 bits)... * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * L2TPv3 Control Message Header Over IP * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | (32 bits of zeros) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |T|L|x|x|S|x|x|x|x|x|x|x| Ver | Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Control Connection ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Ns | Nr | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * All control frames are passed to userspace. */ static int l2tp_ip_recv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct l2tp_ip_net *pn; struct sock *sk; u32 session_id; u32 tunnel_id; unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; struct iphdr *iph; pn = l2tp_ip_pernet(net); if (!pskb_may_pull(skb, 4)) goto discard; /* Point to L2TP header */ optr = skb->data; ptr = skb->data; session_id = ntohl(*((__be32 *)ptr)); ptr += 4; /* RFC3931: L2TP/IP packets have the first 4 bytes containing * the session_id. If it is 0, the packet is a L2TP control * frame and the session_id value can be discarded. */ if (session_id == 0) { __skb_pull(skb, 4); goto pass_up; } /* Ok, this is a data packet. Lookup the session. */ session = l2tp_v3_session_get(net, NULL, session_id); if (!session) goto discard; tunnel = session->tunnel; if (!tunnel) goto discard_sess; if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) goto discard_sess; l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_put(session); return 0; pass_up: /* Get the tunnel_id from the L2TP header */ if (!pskb_may_pull(skb, 12)) goto discard; if ((skb->data[0] & 0xc0) != 0xc0) goto discard; tunnel_id = ntohl(*(__be32 *)&skb->data[4]); iph = (struct iphdr *)skb_network_header(skb); read_lock_bh(&pn->l2tp_ip_lock); sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb), tunnel_id); if (!sk) { read_unlock_bh(&pn->l2tp_ip_lock); goto discard; } sock_hold(sk); read_unlock_bh(&pn->l2tp_ip_lock); if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; nf_reset_ct(skb); return sk_receive_skb(sk, skb, 1); discard_sess: l2tp_session_put(session); goto discard; discard_put: sock_put(sk); discard: kfree_skb(skb); return 0; } static int l2tp_ip_hash(struct sock *sk) { struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk)); if (sk_unhashed(sk)) { write_lock_bh(&pn->l2tp_ip_lock); sk_add_node(sk, &pn->l2tp_ip_table); write_unlock_bh(&pn->l2tp_ip_lock); } return 0; } static void l2tp_ip_unhash(struct sock *sk) { struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk)); if (sk_unhashed(sk)) return; write_lock_bh(&pn->l2tp_ip_lock); sk_del_node_init(sk); write_unlock_bh(&pn->l2tp_ip_lock); } static int l2tp_ip_open(struct sock *sk) { /* Prevent autobind. We don't have ports. */ inet_sk(sk)->inet_num = IPPROTO_L2TP; l2tp_ip_hash(sk); return 0; } static void l2tp_ip_close(struct sock *sk, long timeout) { struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk)); write_lock_bh(&pn->l2tp_ip_lock); hlist_del_init(&sk->sk_bind_node); sk_del_node_init(sk); write_unlock_bh(&pn->l2tp_ip_lock); sk_common_release(sk); } static void l2tp_ip_destroy_sock(struct sock *sk) { struct l2tp_tunnel *tunnel; __skb_queue_purge(&sk->sk_write_queue); tunnel = l2tp_sk_to_tunnel(sk); if (tunnel) { l2tp_tunnel_delete(tunnel); l2tp_tunnel_put(tunnel); } } static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr; struct net *net = sock_net(sk); struct l2tp_ip_net *pn; int ret; int chk_addr_ret; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) return -EINVAL; lock_sock(sk); ret = -EINVAL; if (!sock_flag(sk, SOCK_ZAPPED)) goto out; if (sk->sk_state != TCP_CLOSE) goto out; chk_addr_ret = inet_addr_type(net, addr->l2tp_addr.s_addr); ret = -EADDRNOTAVAIL; if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; if (addr->l2tp_addr.s_addr) { inet->inet_rcv_saddr = addr->l2tp_addr.s_addr; inet->inet_saddr = addr->l2tp_addr.s_addr; } if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ pn = l2tp_ip_pernet(net); write_lock_bh(&pn->l2tp_ip_lock); if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0, sk->sk_bound_dev_if, addr->l2tp_conn_id)) { write_unlock_bh(&pn->l2tp_ip_lock); ret = -EADDRINUSE; goto out; } sk_dst_reset(sk); l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id; sk_add_bind_node(sk, &pn->l2tp_ip_bind_table); sk_del_node_init(sk); write_unlock_bh(&pn->l2tp_ip_lock); ret = 0; sock_reset_flag(sk, SOCK_ZAPPED); out: release_sock(sk); return ret; } static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr; struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk)); int rc; if (addr_len < sizeof(*lsa)) return -EINVAL; if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) return -EINVAL; lock_sock(sk); /* Must bind first - autobinding does not work */ if (sock_flag(sk, SOCK_ZAPPED)) { rc = -EINVAL; goto out_sk; } rc = __ip4_datagram_connect(sk, uaddr, addr_len); if (rc < 0) goto out_sk; l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; write_lock_bh(&pn->l2tp_ip_lock); hlist_del_init(&sk->sk_bind_node); sk_add_bind_node(sk, &pn->l2tp_ip_bind_table); write_unlock_bh(&pn->l2tp_ip_lock); out_sk: release_sock(sk); return rc; } static int l2tp_ip_disconnect(struct sock *sk, int flags) { if (sock_flag(sk, SOCK_ZAPPED)) return 0; return __udp_disconnect(sk, flags); } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk); struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr; memset(lsa, 0, sizeof(*lsa)); lsa->l2tp_family = AF_INET; if (peer) { if (!inet->inet_dport) return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; lsa->l2tp_addr.s_addr = inet->inet_daddr; } else { __be32 addr = inet->inet_rcv_saddr; if (!addr) addr = inet->inet_saddr; lsa->l2tp_conn_id = lsk->conn_id; lsa->l2tp_addr.s_addr = addr; } return sizeof(*lsa); } static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) { int rc; /* Charge it to the socket, dropping if the queue is full. */ rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) goto drop; return 0; drop: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); return 0; } /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct sk_buff *skb; int rc; struct inet_sock *inet = inet_sk(sk); struct rtable *rt = NULL; int connected = 0; __be32 daddr; lock_sock(sk); rc = -ENOTCONN; if (sock_flag(sk, SOCK_DEAD)) goto out; /* Get and verify the address. */ if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_l2tpip *, lip, msg->msg_name); rc = -EINVAL; if (msg->msg_namelen < sizeof(*lip)) goto out; if (lip->l2tp_family != AF_INET) { rc = -EAFNOSUPPORT; if (lip->l2tp_family != AF_UNSPEC) goto out; } daddr = lip->l2tp_addr.s_addr; } else { rc = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; connected = 1; } /* Allocate a socket buffer */ rc = -ENOMEM; skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) + 4 + len, 0, GFP_KERNEL); if (!skb) goto error; /* Reserve space for headers, putting IP header on 4-byte boundary. */ skb_reserve(skb, 2 + NET_SKB_PAD); skb_reset_network_header(skb); skb_reserve(skb, sizeof(struct iphdr)); skb_reset_transport_header(skb); /* Insert 0 session_id */ *((__be32 *)skb_put(skb, 4)) = 0; /* Copy user data into skb */ rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc < 0) { kfree_skb(skb); goto error; } if (connected) rt = dst_rtable(__sk_dst_check(sk, 0)); rcu_read_lock(); if (!rt) { struct flowi4 *fl4 = &inet->cork.fl.u.ip4; inet_sk_init_flowi4(inet, fl4); /* Overwrite ->daddr if msg->msg_name was provided */ if (!connected) fl4->daddr = daddr; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (IS_ERR(rt)) goto no_route; if (connected) { sk_setup_caps(sk, &rt->dst); } else { skb_dst_set(skb, &rt->dst); goto xmit; } } /* We don't need to clone dst here, it is guaranteed to not disappear. * __dev_xmit_skb() might force a refcount if needed. */ skb_dst_set_noref(skb, &rt->dst); xmit: /* Queue the packet to IP for output */ rc = ip_queue_xmit(sk, skb, &inet->cork.fl); rcu_read_unlock(); error: if (rc >= 0) rc = len; out: release_sock(sk); return rc; no_route: rcu_read_unlock(); IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); rc = -EHOSTUNREACH; goto out; } static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; int err = -EOPNOTSUPP; DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; if (flags & MSG_OOB) goto out; skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto done; sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ if (sin) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); } if (inet_cmsg_flags(inet)) ip_cmsg_recv(msg, skb); if (flags & MSG_TRUNC) copied = skb->len; done: skb_free_datagram(sk, skb); out: return err ? err : copied; } int l2tp_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; switch (cmd) { case SIOCOUTQ: *karg = sk_wmem_alloc_get(sk); break; case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); break; default: return -ENOIOCTLCMD; } return 0; } EXPORT_SYMBOL_GPL(l2tp_ioctl); static struct proto l2tp_ip_prot = { .name = "L2TP/IP", .owner = THIS_MODULE, .init = l2tp_ip_open, .close = l2tp_ip_close, .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, .disconnect = l2tp_ip_disconnect, .ioctl = l2tp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, .sendmsg = l2tp_ip_sendmsg, .recvmsg = l2tp_ip_recvmsg, .backlog_rcv = l2tp_ip_backlog_recv, .hash = l2tp_ip_hash, .unhash = l2tp_ip_unhash, .obj_size = sizeof(struct l2tp_ip_sock), }; static const struct proto_ops l2tp_ip_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip_getname, .poll = datagram_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, }; static struct inet_protosw l2tp_ip_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_L2TP, .prot = &l2tp_ip_prot, .ops = &l2tp_ip_ops, }; static struct net_protocol l2tp_ip_protocol __read_mostly = { .handler = l2tp_ip_recv, }; static __net_init int l2tp_ip_init_net(struct net *net) { struct l2tp_ip_net *pn = net_generic(net, l2tp_ip_net_id); rwlock_init(&pn->l2tp_ip_lock); INIT_HLIST_HEAD(&pn->l2tp_ip_table); INIT_HLIST_HEAD(&pn->l2tp_ip_bind_table); return 0; } static __net_exit void l2tp_ip_exit_net(struct net *net) { struct l2tp_ip_net *pn = l2tp_ip_pernet(net); write_lock_bh(&pn->l2tp_ip_lock); WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_table) != 0); WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_bind_table) != 0); write_unlock_bh(&pn->l2tp_ip_lock); } static struct pernet_operations l2tp_ip_net_ops = { .init = l2tp_ip_init_net, .exit = l2tp_ip_exit_net, .id = &l2tp_ip_net_id, .size = sizeof(struct l2tp_ip_net), }; static int __init l2tp_ip_init(void) { int err; pr_info("L2TP IP encapsulation support (L2TPv3)\n"); err = register_pernet_device(&l2tp_ip_net_ops); if (err) goto out; err = proto_register(&l2tp_ip_prot, 1); if (err != 0) goto out1; err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP); if (err) goto out2; inet_register_protosw(&l2tp_ip_protosw); return 0; out2: proto_unregister(&l2tp_ip_prot); out1: unregister_pernet_device(&l2tp_ip_net_ops); out: return err; } static void __exit l2tp_ip_exit(void) { inet_unregister_protosw(&l2tp_ip_protosw); inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP); proto_unregister(&l2tp_ip_prot); unregister_pernet_device(&l2tp_ip_net_ops); } module_init(l2tp_ip_init); module_exit(l2tp_ip_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); /* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, * because __stringify doesn't like enums */ MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2); MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115);
30 8 8 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* mpi-inline.h - Internal to the Multi Precision Integers * Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #ifndef G10_MPI_INLINE_H #define G10_MPI_INLINE_H #ifndef G10_MPI_INLINE_DECL #define G10_MPI_INLINE_DECL static inline #endif G10_MPI_INLINE_DECL mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t x; x = *s1_ptr++; s2_limb += x; *res_ptr++ = s2_limb; if (s2_limb < x) { /* sum is less than the left operand: handle carry */ while (--s1_size) { x = *s1_ptr++ + 1; /* add carry */ *res_ptr++ = x; /* and store */ if (x) /* not 0 (no overflow): we can stop */ goto leave; } return 1; /* return carry (size of s1 to small) */ } leave: if (res_ptr != s1_ptr) { /* not the same variable */ mpi_size_t i; /* copy the rest */ for (i = 0; i < s1_size - 1; i++) res_ptr[i] = s1_ptr[i]; } return 0; /* no carry */ } G10_MPI_INLINE_DECL mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size) { mpi_limb_t cy = 0; if (s2_size) cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size); if (s1_size - s2_size) cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size, s1_size - s2_size, cy); return cy; } G10_MPI_INLINE_DECL mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t x; x = *s1_ptr++; s2_limb = x - s2_limb; *res_ptr++ = s2_limb; if (s2_limb > x) { while (--s1_size) { x = *s1_ptr++; *res_ptr++ = x - 1; if (x) goto leave; } return 1; } leave: if (res_ptr != s1_ptr) { mpi_size_t i; for (i = 0; i < s1_size - 1; i++) res_ptr[i] = s1_ptr[i]; } return 0; } G10_MPI_INLINE_DECL mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size) { mpi_limb_t cy = 0; if (s2_size) cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); if (s1_size - s2_size) cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, s1_size - s2_size, cy); return cy; } #endif /*G10_MPI_INLINE_H */
120 170 170 170 124 124 124 124 124 124 127 127 7 127 127 120 120 120 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 // SPDX-License-Identifier: GPL-2.0 #include <linux/swap_cgroup.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/swapops.h> /* depends on mm.h include */ static DEFINE_MUTEX(swap_cgroup_mutex); /* Pack two cgroup id (short) of two entries in one swap_cgroup (atomic_t) */ #define ID_PER_SC (sizeof(struct swap_cgroup) / sizeof(unsigned short)) #define ID_SHIFT (BITS_PER_TYPE(unsigned short)) #define ID_MASK (BIT(ID_SHIFT) - 1) struct swap_cgroup { atomic_t ids; }; struct swap_cgroup_ctrl { struct swap_cgroup *map; }; static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; static unsigned short __swap_cgroup_id_lookup(struct swap_cgroup *map, pgoff_t offset) { unsigned int shift = (offset % ID_PER_SC) * ID_SHIFT; unsigned int old_ids = atomic_read(&map[offset / ID_PER_SC].ids); BUILD_BUG_ON(!is_power_of_2(ID_PER_SC)); BUILD_BUG_ON(sizeof(struct swap_cgroup) != sizeof(atomic_t)); return (old_ids >> shift) & ID_MASK; } static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map, pgoff_t offset, unsigned short new_id) { unsigned short old_id; struct swap_cgroup *sc = &map[offset / ID_PER_SC]; unsigned int shift = (offset % ID_PER_SC) * ID_SHIFT; unsigned int new_ids, old_ids = atomic_read(&sc->ids); do { old_id = (old_ids >> shift) & ID_MASK; new_ids = (old_ids & ~(ID_MASK << shift)); new_ids |= ((unsigned int)new_id) << shift; } while (!atomic_try_cmpxchg(&sc->ids, &old_ids, new_ids)); return old_id; } /** * swap_cgroup_record - record mem_cgroup for a set of swap entries. * These entries must belong to one single folio, and that folio * must be being charged for swap space (swap out), and these * entries must not have been charged * * @folio: the folio that the swap entry belongs to * @ent: the first swap entry to be recorded */ void swap_cgroup_record(struct folio *folio, swp_entry_t ent) { unsigned int nr_ents = folio_nr_pages(folio); struct swap_cgroup *map; pgoff_t offset, end; unsigned short old; offset = swp_offset(ent); end = offset + nr_ents; map = swap_cgroup_ctrl[swp_type(ent)].map; do { old = __swap_cgroup_id_xchg(map, offset, mem_cgroup_id(folio_memcg(folio))); VM_BUG_ON(old); } while (++offset != end); } /** * swap_cgroup_clear - clear mem_cgroup for a set of swap entries. * These entries must be being uncharged from swap. They either * belongs to one single folio in the swap cache (swap in for * cgroup v1), or no longer have any users (slot freeing). * * @ent: the first swap entry to be recorded into * @nr_ents: number of swap entries to be recorded * * Returns the existing old value. */ unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) { pgoff_t offset = swp_offset(ent); pgoff_t end = offset + nr_ents; struct swap_cgroup *map; unsigned short old, iter = 0; offset = swp_offset(ent); end = offset + nr_ents; map = swap_cgroup_ctrl[swp_type(ent)].map; do { old = __swap_cgroup_id_xchg(map, offset, 0); if (!iter) iter = old; VM_BUG_ON(iter != old); } while (++offset != end); return old; } /** * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry * @ent: swap entry to be looked up. * * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) */ unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { struct swap_cgroup_ctrl *ctrl; if (mem_cgroup_disabled()) return 0; ctrl = &swap_cgroup_ctrl[swp_type(ent)]; return __swap_cgroup_id_lookup(ctrl->map, swp_offset(ent)); } int swap_cgroup_swapon(int type, unsigned long max_pages) { struct swap_cgroup *map; struct swap_cgroup_ctrl *ctrl; if (mem_cgroup_disabled()) return 0; BUILD_BUG_ON(sizeof(unsigned short) * ID_PER_SC != sizeof(struct swap_cgroup)); map = vzalloc(DIV_ROUND_UP(max_pages, ID_PER_SC) * sizeof(struct swap_cgroup)); if (!map) goto nomem; ctrl = &swap_cgroup_ctrl[type]; mutex_lock(&swap_cgroup_mutex); ctrl->map = map; mutex_unlock(&swap_cgroup_mutex); return 0; nomem: pr_info("couldn't allocate enough memory for swap_cgroup\n"); pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n"); return -ENOMEM; } void swap_cgroup_swapoff(int type) { struct swap_cgroup *map; struct swap_cgroup_ctrl *ctrl; if (mem_cgroup_disabled()) return; mutex_lock(&swap_cgroup_mutex); ctrl = &swap_cgroup_ctrl[type]; map = ctrl->map; ctrl->map = NULL; mutex_unlock(&swap_cgroup_mutex); vfree(map); }
12 34 9 33 24 2 31 32 28 31 31 70 70 63 63 2854 13 72 72 61 55 55 61 13 2 1 12 13 12 12 11 10 12 14 9 3 3 3 2 1 3 9 6 5 5 5 5 4 4 5 7 4 4 4 2 4 1 2 1 7 6 6 2 4 3 2 2 1 23 3 2 1 7 4 4 1 265 1 11 6 3 1 2 1 35 20 19 11 40 40 2 7 7 6 6 2 5 7 38 17 23 262 24 2 23 4 271 269 265 4 263 75 75 75 7 9 72 75 67 67 67 64 17 16 2 17 17 17 48 70 69 65 65 64 65 4 3 3 4 70 6 10 6 50 14 13 6 6 5 6 7 5 5 7 14 50 174 2 32 33 175 76 11 72 1 8 7 66 72 49 49 2 2 213 174 49 84 84 84 84 47 47 83 84 84 876 84 84 84 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/fcntl.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/syscalls.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/sched/task.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/file.h> #include <linux/capability.h> #include <linux/dnotify.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/pipe_fs_i.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/memfd.h> #include <linux/compat.h> #include <linux/mount.h> #include <linux/rw_hint.h> #include <linux/poll.h> #include <asm/siginfo.h> #include <linux/uaccess.h> #include "internal.h" #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned int arg) { struct inode * inode = file_inode(filp); int error = 0; /* * O_APPEND cannot be cleared if the file is marked as append-only * and the file is open for write. */ if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM; /* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) if (!inode_owner_or_capable(file_mnt_idmap(filp), inode)) return -EPERM; /* required for strict SunOS emulation */ if (O_NONBLOCK != O_NDELAY) if (arg & O_NDELAY) arg |= O_NONBLOCK; /* Pipe packetized mode is controlled by O_DIRECT flag */ if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT) && !(filp->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL; if (filp->f_op->check_flags) error = filp->f_op->check_flags(arg); if (error) return error; /* * ->fasync() is responsible for setting the FASYNC bit. */ if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; if (error > 0) error = 0; } spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); filp->f_iocb_flags = iocb_flags(filp); spin_unlock(&filp->f_lock); out: return error; } /* * Allocate an file->f_owner struct if it doesn't exist, handling racing * allocations correctly. */ int file_f_owner_allocate(struct file *file) { struct fown_struct *f_owner; f_owner = file_f_owner(file); if (f_owner) return 0; f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); if (!f_owner) return -ENOMEM; rwlock_init(&f_owner->lock); f_owner->file = file; /* If someone else raced us, drop our allocation. */ if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner))) kfree(f_owner); return 0; } EXPORT_SYMBOL(file_f_owner_allocate); void file_f_owner_release(struct file *file) { struct fown_struct *f_owner; f_owner = file_f_owner(file); if (f_owner) { put_pid(f_owner->pid); kfree(f_owner); } } void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { struct fown_struct *f_owner; f_owner = file_f_owner(filp); if (WARN_ON_ONCE(!f_owner)) return; write_lock_irq(&f_owner->lock); if (force || !f_owner->pid) { put_pid(f_owner->pid); f_owner->pid = get_pid(pid); f_owner->pid_type = type; if (pid) { const struct cred *cred = current_cred(); security_file_set_fowner(filp); f_owner->uid = cred->uid; f_owner->euid = cred->euid; } } write_unlock_irq(&f_owner->lock); } EXPORT_SYMBOL(__f_setown); int f_setown(struct file *filp, int who, int force) { enum pid_type type; struct pid *pid = NULL; int ret = 0; might_sleep(); type = PIDTYPE_TGID; if (who < 0) { /* avoid overflow below */ if (who == INT_MIN) return -EINVAL; type = PIDTYPE_PGID; who = -who; } ret = file_f_owner_allocate(filp); if (ret) return ret; rcu_read_lock(); if (who) { pid = find_vpid(who); if (!pid) ret = -ESRCH; } if (!ret) __f_setown(filp, pid, type, force); rcu_read_unlock(); return ret; } EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { __f_setown(filp, NULL, PIDTYPE_TGID, 1); } pid_t f_getown(struct file *filp) { pid_t pid = 0; struct fown_struct *f_owner; f_owner = file_f_owner(filp); if (!f_owner) return pid; read_lock_irq(&f_owner->lock); rcu_read_lock(); if (pid_task(f_owner->pid, f_owner->pid_type)) { pid = pid_vnr(f_owner->pid); if (f_owner->pid_type == PIDTYPE_PGID) pid = -pid; } rcu_read_unlock(); read_unlock_irq(&f_owner->lock); return pid; } static int f_setown_ex(struct file *filp, unsigned long arg) { struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; struct pid *pid; int type; int ret; ret = copy_from_user(&owner, owner_p, sizeof(owner)); if (ret) return -EFAULT; switch (owner.type) { case F_OWNER_TID: type = PIDTYPE_PID; break; case F_OWNER_PID: type = PIDTYPE_TGID; break; case F_OWNER_PGRP: type = PIDTYPE_PGID; break; default: return -EINVAL; } ret = file_f_owner_allocate(filp); if (ret) return ret; rcu_read_lock(); pid = find_vpid(owner.pid); if (owner.pid && !pid) ret = -ESRCH; else __f_setown(filp, pid, type, 1); rcu_read_unlock(); return ret; } static int f_getown_ex(struct file *filp, unsigned long arg) { struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner = {}; int ret = 0; struct fown_struct *f_owner; enum pid_type pid_type = PIDTYPE_PID; f_owner = file_f_owner(filp); if (f_owner) { read_lock_irq(&f_owner->lock); rcu_read_lock(); if (pid_task(f_owner->pid, f_owner->pid_type)) owner.pid = pid_vnr(f_owner->pid); rcu_read_unlock(); pid_type = f_owner->pid_type; } switch (pid_type) { case PIDTYPE_PID: owner.type = F_OWNER_TID; break; case PIDTYPE_TGID: owner.type = F_OWNER_PID; break; case PIDTYPE_PGID: owner.type = F_OWNER_PGRP; break; default: WARN_ON(1); ret = -EINVAL; break; } if (f_owner) read_unlock_irq(&f_owner->lock); if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); if (ret) ret = -EFAULT; } return ret; } #ifdef CONFIG_CHECKPOINT_RESTORE static int f_getowner_uids(struct file *filp, unsigned long arg) { struct user_namespace *user_ns = current_user_ns(); struct fown_struct *f_owner; uid_t __user *dst = (void __user *)arg; uid_t src[2] = {0, 0}; int err; f_owner = file_f_owner(filp); if (f_owner) { read_lock_irq(&f_owner->lock); src[0] = from_kuid(user_ns, f_owner->uid); src[1] = from_kuid(user_ns, f_owner->euid); read_unlock_irq(&f_owner->lock); } err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]); return err; } #else static int f_getowner_uids(struct file *filp, unsigned long arg) { return -EINVAL; } #endif static bool rw_hint_valid(u64 hint) { BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET); BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE); BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT); BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM); BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG); BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME); switch (hint) { case RWH_WRITE_LIFE_NOT_SET: case RWH_WRITE_LIFE_NONE: case RWH_WRITE_LIFE_SHORT: case RWH_WRITE_LIFE_MEDIUM: case RWH_WRITE_LIFE_LONG: case RWH_WRITE_LIFE_EXTREME: return true; default: return false; } } static long fcntl_get_rw_hint(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(file); u64 __user *argp = (u64 __user *)arg; u64 hint = READ_ONCE(inode->i_write_hint); if (copy_to_user(argp, &hint, sizeof(*argp))) return -EFAULT; return 0; } static long fcntl_set_rw_hint(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(file); u64 __user *argp = (u64 __user *)arg; u64 hint; if (!inode_owner_or_capable(file_mnt_idmap(file), inode)) return -EPERM; if (copy_from_user(&hint, argp, sizeof(hint))) return -EFAULT; if (!rw_hint_valid(hint)) return -EINVAL; WRITE_ONCE(inode->i_write_hint, hint); /* * file->f_mapping->host may differ from inode. As an example, * blkdev_open() modifies file->f_mapping. */ if (file->f_mapping->host != inode) WRITE_ONCE(file->f_mapping->host->i_write_hint, hint); return 0; } /* Is the file descriptor a dup of the file? */ static long f_dupfd_query(int fd, struct file *filp) { CLASS(fd_raw, f)(fd); if (fd_empty(f)) return -EBADF; /* * We can do the 'fdput()' immediately, as the only thing that * matters is the pointer value which isn't changed by the fdput. * * Technically we didn't need a ref at all, and 'fdget()' was * overkill, but given our lockless file pointer lookup, the * alternatives are complicated. */ return fd_file(f) == filp; } /* Let the caller figure out whether a given file was just created. */ static long f_created_query(const struct file *filp) { return !!(filp->f_mode & FMODE_CREATED); } static int f_owner_sig(struct file *filp, int signum, bool setsig) { int ret = 0; struct fown_struct *f_owner; might_sleep(); if (setsig) { if (!valid_signal(signum)) return -EINVAL; ret = file_f_owner_allocate(filp); if (ret) return ret; } f_owner = file_f_owner(filp); if (setsig) f_owner->signum = signum; else if (f_owner) ret = f_owner->signum; return ret; } static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { void __user *argp = (void __user *)arg; int argi = (int)arg; struct flock flock; long err = -EINVAL; switch (cmd) { case F_CREATED_QUERY: err = f_created_query(filp); break; case F_DUPFD: err = f_dupfd(argi, filp, 0); break; case F_DUPFD_CLOEXEC: err = f_dupfd(argi, filp, O_CLOEXEC); break; case F_DUPFD_QUERY: err = f_dupfd_query(argi, filp); break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; break; case F_SETFD: err = 0; set_close_on_exec(fd, argi & FD_CLOEXEC); break; case F_GETFL: err = filp->f_flags; break; case F_SETFL: err = setfl(fd, filp, argi); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ case F_OFD_GETLK: #endif case F_GETLK: if (copy_from_user(&flock, argp, sizeof(flock))) return -EFAULT; err = fcntl_getlk(filp, cmd, &flock); if (!err && copy_to_user(argp, &flock, sizeof(flock))) return -EFAULT; break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ case F_OFD_SETLK: case F_OFD_SETLKW: fallthrough; #endif case F_SETLK: case F_SETLKW: if (copy_from_user(&flock, argp, sizeof(flock))) return -EFAULT; err = fcntl_setlk(fd, filp, cmd, &flock); break; case F_GETOWN: /* * XXX If f_owner is a process group, the * negative return value will get converted * into an error. Oops. If we keep the * current syscall conventions, the only way * to fix this will be in libc. */ err = f_getown(filp); force_successful_syscall_return(); break; case F_SETOWN: err = f_setown(filp, argi, 1); break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); break; case F_SETOWN_EX: err = f_setown_ex(filp, arg); break; case F_GETOWNER_UIDS: err = f_getowner_uids(filp, arg); break; case F_GETSIG: err = f_owner_sig(filp, 0, false); break; case F_SETSIG: err = f_owner_sig(filp, argi, true); break; case F_GETLEASE: err = fcntl_getlease(filp); break; case F_SETLEASE: err = fcntl_setlease(fd, filp, argi); break; case F_NOTIFY: err = fcntl_dirnotify(fd, filp, argi); break; case F_SETPIPE_SZ: case F_GETPIPE_SZ: err = pipe_fcntl(filp, cmd, argi); break; case F_ADD_SEALS: case F_GET_SEALS: err = memfd_fcntl(filp, cmd, argi); break; case F_GET_RW_HINT: err = fcntl_get_rw_hint(filp, cmd, arg); break; case F_SET_RW_HINT: err = fcntl_set_rw_hint(filp, cmd, arg); break; default: break; } return err; } static int check_fcntl_cmd(unsigned cmd) { switch (cmd) { case F_CREATED_QUERY: case F_DUPFD: case F_DUPFD_CLOEXEC: case F_DUPFD_QUERY: case F_GETFD: case F_SETFD: case F_GETFL: return 1; } return 0; } SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { CLASS(fd_raw, f)(fd); long err; if (fd_empty(f)) return -EBADF; if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) return -EBADF; } err = security_file_fcntl(fd_file(f), cmd, arg); if (!err) err = do_fcntl(fd, cmd, arg, fd_file(f)); return err; } #if BITS_PER_LONG == 32 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { void __user *argp = (void __user *)arg; CLASS(fd_raw, f)(fd); struct flock64 flock; long err; if (fd_empty(f)) return -EBADF; if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) return -EBADF; } err = security_file_fcntl(fd_file(f), cmd, arg); if (err) return err; switch (cmd) { case F_GETLK64: case F_OFD_GETLK: err = -EFAULT; if (copy_from_user(&flock, argp, sizeof(flock))) break; err = fcntl_getlk64(fd_file(f), cmd, &flock); if (!err && copy_to_user(argp, &flock, sizeof(flock))) err = -EFAULT; break; case F_SETLK64: case F_SETLKW64: case F_OFD_SETLK: case F_OFD_SETLKW: err = -EFAULT; if (copy_from_user(&flock, argp, sizeof(flock))) break; err = fcntl_setlk64(fd, fd_file(f), cmd, &flock); break; default: err = do_fcntl(fd, cmd, arg, fd_file(f)); break; } return err; } #endif #ifdef CONFIG_COMPAT /* careful - don't use anywhere else */ #define copy_flock_fields(dst, src) \ (dst)->l_type = (src)->l_type; \ (dst)->l_whence = (src)->l_whence; \ (dst)->l_start = (src)->l_start; \ (dst)->l_len = (src)->l_len; \ (dst)->l_pid = (src)->l_pid; static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl) { struct compat_flock fl; if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) return -EFAULT; copy_flock_fields(kfl, &fl); return 0; } static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl) { struct compat_flock64 fl; if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) return -EFAULT; copy_flock_fields(kfl, &fl); return 0; } static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl) { struct compat_flock fl; memset(&fl, 0, sizeof(struct compat_flock)); copy_flock_fields(&fl, kfl); if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) return -EFAULT; return 0; } static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl) { struct compat_flock64 fl; BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); memset(&fl, 0, sizeof(struct compat_flock64)); copy_flock_fields(&fl, kfl); if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) return -EFAULT; return 0; } #undef copy_flock_fields static unsigned int convert_fcntl_cmd(unsigned int cmd) { switch (cmd) { case F_GETLK64: return F_GETLK; case F_SETLK64: return F_SETLK; case F_SETLKW64: return F_SETLKW; } return cmd; } /* * GETLK was successful and we need to return the data, but it needs to fit in * the compat structure. * l_start shouldn't be too big, unless the original start + end is greater than * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return * -EOVERFLOW in that case. l_len could be too big, in which case we just * truncate it, and only allow the app to see that part of the conflicting lock * that might make sense to it anyway */ static int fixup_compat_flock(struct flock *flock) { if (flock->l_start > COMPAT_OFF_T_MAX) return -EOVERFLOW; if (flock->l_len > COMPAT_OFF_T_MAX) flock->l_len = COMPAT_OFF_T_MAX; return 0; } static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, compat_ulong_t arg) { CLASS(fd_raw, f)(fd); struct flock flock; long err; if (fd_empty(f)) return -EBADF; if (unlikely(fd_file(f)->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) return -EBADF; } err = security_file_fcntl(fd_file(f), cmd, arg); if (err) return err; switch (cmd) { case F_GETLK: err = get_compat_flock(&flock, compat_ptr(arg)); if (err) break; err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); if (err) break; err = fixup_compat_flock(&flock); if (!err) err = put_compat_flock(&flock, compat_ptr(arg)); break; case F_GETLK64: case F_OFD_GETLK: err = get_compat_flock64(&flock, compat_ptr(arg)); if (err) break; err = fcntl_getlk(fd_file(f), convert_fcntl_cmd(cmd), &flock); if (!err) err = put_compat_flock64(&flock, compat_ptr(arg)); break; case F_SETLK: case F_SETLKW: err = get_compat_flock(&flock, compat_ptr(arg)); if (err) break; err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); break; case F_SETLK64: case F_SETLKW64: case F_OFD_SETLK: case F_OFD_SETLKW: err = get_compat_flock64(&flock, compat_ptr(arg)); if (err) break; err = fcntl_setlk(fd, fd_file(f), convert_fcntl_cmd(cmd), &flock); break; default: err = do_fcntl(fd, cmd, arg, fd_file(f)); break; } return err; } COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { return do_compat_fcntl64(fd, cmd, arg); } COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { switch (cmd) { case F_GETLK64: case F_SETLK64: case F_SETLKW64: case F_OFD_GETLK: case F_OFD_SETLK: case F_OFD_SETLKW: return -EINVAL; } return do_compat_fcntl64(fd, cmd, arg); } #endif /* Table to convert sigio signal codes into poll band bitmaps */ static const __poll_t band_table[NSIGPOLL] = { EPOLLIN | EPOLLRDNORM, /* POLL_IN */ EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */ EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */ EPOLLERR, /* POLL_ERR */ EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */ EPOLLHUP | EPOLLERR /* POLL_HUP */ }; static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { const struct cred *cred; int ret; rcu_read_lock(); cred = __task_cred(p); ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); rcu_read_unlock(); return ret; } static void send_sigio_to_task(struct task_struct *p, struct fown_struct *fown, int fd, int reason, enum pid_type type) { /* * F_SETSIG can change ->signum lockless in parallel, make * sure we read it once and use the same value throughout. */ int signum = READ_ONCE(fown->signum); if (!sigio_perm(p, fown, signum)) return; switch (signum) { default: { kernel_siginfo_t si; /* Queue a rt signal with the appropriate fd as its value. We use SI_SIGIO as the source, not SI_KERNEL, since kernel signals always get delivered even if we can't queue. Failure to queue in this case _should_ be reported; we fall back to SIGIO in that case. --sct */ clear_siginfo(&si); si.si_signo = signum; si.si_errno = 0; si.si_code = reason; /* * Posix definies POLL_IN and friends to be signal * specific si_codes for SIG_POLL. Linux extended * these si_codes to other signals in a way that is * ambiguous if other signals also have signal * specific si_codes. In that case use SI_SIGIO instead * to remove the ambiguity. */ if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) si.si_code = SI_SIGIO; /* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into userspace. */ BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); if (reason - POLL_IN >= NSIGPOLL) si.si_band = ~0L; else si.si_band = mangle_poll(band_table[reason - POLL_IN]); si.si_fd = fd; if (!do_send_sig_info(signum, &si, p, type)) break; } fallthrough; /* fall back on the old plain SIGIO signal */ case 0: do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); } } void send_sigio(struct fown_struct *fown, int fd, int band) { struct task_struct *p; enum pid_type type; unsigned long flags; struct pid *pid; read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; pid = fown->pid; if (!pid) goto out_unlock_fown; if (type <= PIDTYPE_TGID) { rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) send_sigio_to_task(p, fown, fd, band, type); rcu_read_unlock(); } else { read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { send_sigio_to_task(p, fown, fd, band, type); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); } out_unlock_fown: read_unlock_irqrestore(&fown->lock, flags); } static void send_sigurg_to_task(struct task_struct *p, struct fown_struct *fown, enum pid_type type) { if (sigio_perm(p, fown, SIGURG)) do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); } int send_sigurg(struct file *file) { struct fown_struct *fown; struct task_struct *p; enum pid_type type; struct pid *pid; unsigned long flags; int ret = 0; fown = file_f_owner(file); if (!fown) return 0; read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; pid = fown->pid; if (!pid) goto out_unlock_fown; ret = 1; if (type <= PIDTYPE_TGID) { rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) send_sigurg_to_task(p, fown, type); rcu_read_unlock(); } else { read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { send_sigurg_to_task(p, fown, type); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); } out_unlock_fown: read_unlock_irqrestore(&fown->lock, flags); return ret; } static DEFINE_SPINLOCK(fasync_lock); static struct kmem_cache *fasync_cache __ro_after_init; /* * Remove a fasync entry. If successfully removed, return * positive and clear the FASYNC flag. If no entry exists, * do nothing and return 0. * * NOTE! It is very important that the FASYNC flag always * match the state "is the filp on a fasync list". * */ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *fa, **fp; int result = 0; spin_lock(&filp->f_lock); spin_lock(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file != filp) continue; write_lock_irq(&fa->fa_lock); fa->fa_file = NULL; write_unlock_irq(&fa->fa_lock); *fp = fa->fa_next; kfree_rcu(fa, fa_rcu); filp->f_flags &= ~FASYNC; result = 1; break; } spin_unlock(&fasync_lock); spin_unlock(&filp->f_lock); return result; } struct fasync_struct *fasync_alloc(void) { return kmem_cache_alloc(fasync_cache, GFP_KERNEL); } /* * NOTE! This can be used only for unused fasync entries: * entries that actually got inserted on the fasync list * need to be released by rcu - see fasync_remove_entry. */ void fasync_free(struct fasync_struct *new) { kmem_cache_free(fasync_cache, new); } /* * Insert a new entry into the fasync list. Return the pointer to the * old one if we didn't use the new one. * * NOTE! It is very important that the FASYNC flag always * match the state "is the filp on a fasync list". */ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) { struct fasync_struct *fa, **fp; spin_lock(&filp->f_lock); spin_lock(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file != filp) continue; write_lock_irq(&fa->fa_lock); fa->fa_fd = fd; write_unlock_irq(&fa->fa_lock); goto out; } rwlock_init(&new->fa_lock); new->magic = FASYNC_MAGIC; new->fa_file = filp; new->fa_fd = fd; new->fa_next = *fapp; rcu_assign_pointer(*fapp, new); filp->f_flags |= FASYNC; out: spin_unlock(&fasync_lock); spin_unlock(&filp->f_lock); return fa; } /* * Add a fasync entry. Return negative on error, positive if * added, and zero if did nothing but change an existing one. */ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *new; new = fasync_alloc(); if (!new) return -ENOMEM; /* * fasync_insert_entry() returns the old (update) entry if * it existed. * * So free the (unused) new entry and return 0 to let the * caller know that we didn't add any new fasync entries. */ if (fasync_insert_entry(fd, filp, fapp, new)) { fasync_free(new); return 0; } return 1; } /* * fasync_helper() is used by almost all character device drivers * to set up the fasync queue, and for regular files by the file * lease code. It returns negative on error, 0 if it did no changes * and positive if it added/deleted the entry. */ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) { if (!on) return fasync_remove_entry(filp, fapp); return fasync_add_entry(fd, filp, fapp); } EXPORT_SYMBOL(fasync_helper); /* * rcu_read_lock() is held */ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = file_f_owner(fa->fa_file); if (!fown) goto next; /* Don't send SIGURG to processes which have not set a queued signum: SIGURG has its own default signalling mechanism. */ if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } next: read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } void kill_fasync(struct fasync_struct **fp, int sig, int band) { /* First a quick test without locking: usually * the list is empty. */ if (*fp) { rcu_read_lock(); kill_fasync_rcu(rcu_dereference(*fp), sig, band); rcu_read_unlock(); } } EXPORT_SYMBOL(kill_fasync); static int __init fcntl_init(void) { /* * Please add new bits here to ensure allocation uniqueness. * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | __FMODE_EXEC)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); return 0; } module_init(fcntl_init)
415 414 415 439 437 388 438 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 // SPDX-License-Identifier: GPL-2.0 #include "alloc_cache.h" void io_alloc_cache_free(struct io_alloc_cache *cache, void (*free)(const void *)) { void *entry; if (!cache->entries) return; while ((entry = io_alloc_cache_get(cache)) != NULL) free(entry); kvfree(cache->entries); cache->entries = NULL; } /* returns false if the cache was initialized properly */ bool io_alloc_cache_init(struct io_alloc_cache *cache, unsigned max_nr, unsigned int size, unsigned int init_bytes) { cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); if (!cache->entries) return true; cache->nr_cached = 0; cache->max_cached = max_nr; cache->elem_size = size; cache->init_clear = init_bytes; return false; } void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp) { void *obj; obj = kmalloc(cache->elem_size, gfp); if (obj && cache->init_clear) memset(obj, 0, cache->init_clear); return obj; }
8 8 8 8 8 8 6 8 8 4 8 8 8 8 8 8 4 4 4 8 8 8 8 8 5 1 1 1 5 4 5 5 5 8 8 6 2 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 // SPDX-License-Identifier: GPL-2.0+ OR BSD-2-Clause /* * Streebog hash function as specified by GOST R 34.11-2012 and * described at https://tools.ietf.org/html/rfc6986 * * Copyright (c) 2013 Alexey Degtyarev <alexey@renatasystems.org> * Copyright (c) 2018 Vitaly Chikunov <vt@altlinux.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. */ #include <crypto/internal/hash.h> #include <linux/module.h> #include <linux/crypto.h> #include <crypto/streebog.h> static const struct streebog_uint512 buffer0 = { { 0, 0, 0, 0, 0, 0, 0, 0 } }; static const struct streebog_uint512 buffer512 = { { cpu_to_le64(0x200), 0, 0, 0, 0, 0, 0, 0 } }; static const struct streebog_uint512 C[12] = { { { cpu_to_le64(0xdd806559f2a64507ULL), cpu_to_le64(0x05767436cc744d23ULL), cpu_to_le64(0xa2422a08a460d315ULL), cpu_to_le64(0x4b7ce09192676901ULL), cpu_to_le64(0x714eb88d7585c4fcULL), cpu_to_le64(0x2f6a76432e45d016ULL), cpu_to_le64(0xebcb2f81c0657c1fULL), cpu_to_le64(0xb1085bda1ecadae9ULL) } }, { { cpu_to_le64(0xe679047021b19bb7ULL), cpu_to_le64(0x55dda21bd7cbcd56ULL), cpu_to_le64(0x5cb561c2db0aa7caULL), cpu_to_le64(0x9ab5176b12d69958ULL), cpu_to_le64(0x61d55e0f16b50131ULL), cpu_to_le64(0xf3feea720a232b98ULL), cpu_to_le64(0x4fe39d460f70b5d7ULL), cpu_to_le64(0x6fa3b58aa99d2f1aULL) } }, { { cpu_to_le64(0x991e96f50aba0ab2ULL), cpu_to_le64(0xc2b6f443867adb31ULL), cpu_to_le64(0xc1c93a376062db09ULL), cpu_to_le64(0xd3e20fe490359eb1ULL), cpu_to_le64(0xf2ea7514b1297b7bULL), cpu_to_le64(0x06f15e5f529c1f8bULL), cpu_to_le64(0x0a39fc286a3d8435ULL), cpu_to_le64(0xf574dcac2bce2fc7ULL) } }, { { cpu_to_le64(0x220cbebc84e3d12eULL), cpu_to_le64(0x3453eaa193e837f1ULL), cpu_to_le64(0xd8b71333935203beULL), cpu_to_le64(0xa9d72c82ed03d675ULL), cpu_to_le64(0x9d721cad685e353fULL), cpu_to_le64(0x488e857e335c3c7dULL), cpu_to_le64(0xf948e1a05d71e4ddULL), cpu_to_le64(0xef1fdfb3e81566d2ULL) } }, { { cpu_to_le64(0x601758fd7c6cfe57ULL), cpu_to_le64(0x7a56a27ea9ea63f5ULL), cpu_to_le64(0xdfff00b723271a16ULL), cpu_to_le64(0xbfcd1747253af5a3ULL), cpu_to_le64(0x359e35d7800fffbdULL), cpu_to_le64(0x7f151c1f1686104aULL), cpu_to_le64(0x9a3f410c6ca92363ULL), cpu_to_le64(0x4bea6bacad474799ULL) } }, { { cpu_to_le64(0xfa68407a46647d6eULL), cpu_to_le64(0xbf71c57236904f35ULL), cpu_to_le64(0x0af21f66c2bec6b6ULL), cpu_to_le64(0xcffaa6b71c9ab7b4ULL), cpu_to_le64(0x187f9ab49af08ec6ULL), cpu_to_le64(0x2d66c4f95142a46cULL), cpu_to_le64(0x6fa4c33b7a3039c0ULL), cpu_to_le64(0xae4faeae1d3ad3d9ULL) } }, { { cpu_to_le64(0x8886564d3a14d493ULL), cpu_to_le64(0x3517454ca23c4af3ULL), cpu_to_le64(0x06476983284a0504ULL), cpu_to_le64(0x0992abc52d822c37ULL), cpu_to_le64(0xd3473e33197a93c9ULL), cpu_to_le64(0x399ec6c7e6bf87c9ULL), cpu_to_le64(0x51ac86febf240954ULL), cpu_to_le64(0xf4c70e16eeaac5ecULL) } }, { { cpu_to_le64(0xa47f0dd4bf02e71eULL), cpu_to_le64(0x36acc2355951a8d9ULL), cpu_to_le64(0x69d18d2bd1a5c42fULL), cpu_to_le64(0xf4892bcb929b0690ULL), cpu_to_le64(0x89b4443b4ddbc49aULL), cpu_to_le64(0x4eb7f8719c36de1eULL), cpu_to_le64(0x03e7aa020c6e4141ULL), cpu_to_le64(0x9b1f5b424d93c9a7ULL) } }, { { cpu_to_le64(0x7261445183235adbULL), cpu_to_le64(0x0e38dc92cb1f2a60ULL), cpu_to_le64(0x7b2b8a9aa6079c54ULL), cpu_to_le64(0x800a440bdbb2ceb1ULL), cpu_to_le64(0x3cd955b7e00d0984ULL), cpu_to_le64(0x3a7d3a1b25894224ULL), cpu_to_le64(0x944c9ad8ec165fdeULL), cpu_to_le64(0x378f5a541631229bULL) } }, { { cpu_to_le64(0x74b4c7fb98459cedULL), cpu_to_le64(0x3698fad1153bb6c3ULL), cpu_to_le64(0x7a1e6c303b7652f4ULL), cpu_to_le64(0x9fe76702af69334bULL), cpu_to_le64(0x1fffe18a1b336103ULL), cpu_to_le64(0x8941e71cff8a78dbULL), cpu_to_le64(0x382ae548b2e4f3f3ULL), cpu_to_le64(0xabbedea680056f52ULL) } }, { { cpu_to_le64(0x6bcaa4cd81f32d1bULL), cpu_to_le64(0xdea2594ac06fd85dULL), cpu_to_le64(0xefbacd1d7d476e98ULL), cpu_to_le64(0x8a1d71efea48b9caULL), cpu_to_le64(0x2001802114846679ULL), cpu_to_le64(0xd8fa6bbbebab0761ULL), cpu_to_le64(0x3002c6cd635afe94ULL), cpu_to_le64(0x7bcd9ed0efc889fbULL) } }, { { cpu_to_le64(0x48bc924af11bd720ULL), cpu_to_le64(0xfaf417d5d9b21b99ULL), cpu_to_le64(0xe71da4aa88e12852ULL), cpu_to_le64(0x5d80ef9d1891cc86ULL), cpu_to_le64(0xf82012d430219f9bULL), cpu_to_le64(0xcda43c32bcdf1d77ULL), cpu_to_le64(0xd21380b00449b17aULL), cpu_to_le64(0x378ee767f11631baULL) } } }; static const unsigned long long Ax[8][256] = { { 0xd01f715b5c7ef8e6ULL, 0x16fa240980778325ULL, 0xa8a42e857ee049c8ULL, 0x6ac1068fa186465bULL, 0x6e417bd7a2e9320bULL, 0x665c8167a437daabULL, 0x7666681aa89617f6ULL, 0x4b959163700bdcf5ULL, 0xf14be6b78df36248ULL, 0xc585bd689a625cffULL, 0x9557d7fca67d82cbULL, 0x89f0b969af6dd366ULL, 0xb0833d48749f6c35ULL, 0xa1998c23b1ecbc7cULL, 0x8d70c431ac02a736ULL, 0xd6dfbc2fd0a8b69eULL, 0x37aeb3e551fa198bULL, 0x0b7d128a40b5cf9cULL, 0x5a8f2008b5780cbcULL, 0xedec882284e333e5ULL, 0xd25fc177d3c7c2ceULL, 0x5e0f5d50b61778ecULL, 0x1d873683c0c24cb9ULL, 0xad040bcbb45d208cULL, 0x2f89a0285b853c76ULL, 0x5732fff6791b8d58ULL, 0x3e9311439ef6ec3fULL, 0xc9183a809fd3c00fULL, 0x83adf3f5260a01eeULL, 0xa6791941f4e8ef10ULL, 0x103ae97d0ca1cd5dULL, 0x2ce948121dee1b4aULL, 0x39738421dbf2bf53ULL, 0x093da2a6cf0cf5b4ULL, 0xcd9847d89cbcb45fULL, 0xf9561c078b2d8ae8ULL, 0x9c6a755a6971777fULL, 0xbc1ebaa0712ef0c5ULL, 0x72e61542abf963a6ULL, 0x78bb5fde229eb12eULL, 0x14ba94250fceb90dULL, 0x844d6697630e5282ULL, 0x98ea08026a1e032fULL, 0xf06bbea144217f5cULL, 0xdb6263d11ccb377aULL, 0x641c314b2b8ee083ULL, 0x320e96ab9b4770cfULL, 0x1ee7deb986a96b85ULL, 0xe96cf57a878c47b5ULL, 0xfdd6615f8842feb8ULL, 0xc83862965601dd1bULL, 0x2ea9f83e92572162ULL, 0xf876441142ff97fcULL, 0xeb2c455608357d9dULL, 0x5612a7e0b0c9904cULL, 0x6c01cbfb2d500823ULL, 0x4548a6a7fa037a2dULL, 0xabc4c6bf388b6ef4ULL, 0xbade77d4fdf8bebdULL, 0x799b07c8eb4cac3aULL, 0x0c9d87e805b19cf0ULL, 0xcb588aac106afa27ULL, 0xea0c1d40c1e76089ULL, 0x2869354a1e816f1aULL, 0xff96d17307fbc490ULL, 0x9f0a9d602f1a5043ULL, 0x96373fc6e016a5f7ULL, 0x5292dab8b3a6e41cULL, 0x9b8ae0382c752413ULL, 0x4f15ec3b7364a8a5ULL, 0x3fb349555724f12bULL, 0xc7c50d4415db66d7ULL, 0x92b7429ee379d1a7ULL, 0xd37f99611a15dfdaULL, 0x231427c05e34a086ULL, 0xa439a96d7b51d538ULL, 0xb403401077f01865ULL, 0xdda2aea5901d7902ULL, 0x0a5d4a9c8967d288ULL, 0xc265280adf660f93ULL, 0x8bb0094520d4e94eULL, 0x2a29856691385532ULL, 0x42a833c5bf072941ULL, 0x73c64d54622b7eb2ULL, 0x07e095624504536cULL, 0x8a905153e906f45aULL, 0x6f6123c16b3b2f1fULL, 0xc6e55552dc097bc3ULL, 0x4468feb133d16739ULL, 0xe211e7f0c7398829ULL, 0xa2f96419f7879b40ULL, 0x19074bdbc3ad38e9ULL, 0xf4ebc3f9474e0b0cULL, 0x43886bd376d53455ULL, 0xd8028beb5aa01046ULL, 0x51f23282f5cdc320ULL, 0xe7b1c2be0d84e16dULL, 0x081dfab006dee8a0ULL, 0x3b33340d544b857bULL, 0x7f5bcabc679ae242ULL, 0x0edd37c48a08a6d8ULL, 0x81ed43d9a9b33bc6ULL, 0xb1a3655ebd4d7121ULL, 0x69a1eeb5e7ed6167ULL, 0xf6ab73d5c8f73124ULL, 0x1a67a3e185c61fd5ULL, 0x2dc91004d43c065eULL, 0x0240b02c8fb93a28ULL, 0x90f7f2b26cc0eb8fULL, 0x3cd3a16f114fd617ULL, 0xaae49ea9f15973e0ULL, 0x06c0cd748cd64e78ULL, 0xda423bc7d5192a6eULL, 0xc345701c16b41287ULL, 0x6d2193ede4821537ULL, 0xfcf639494190e3acULL, 0x7c3b228621f1c57eULL, 0xfb16ac2b0494b0c0ULL, 0xbf7e529a3745d7f9ULL, 0x6881b6a32e3f7c73ULL, 0xca78d2bad9b8e733ULL, 0xbbfe2fc2342aa3a9ULL, 0x0dbddffecc6381e4ULL, 0x70a6a56e2440598eULL, 0xe4d12a844befc651ULL, 0x8c509c2765d0ba22ULL, 0xee8c6018c28814d9ULL, 0x17da7c1f49a59e31ULL, 0x609c4c1328e194d3ULL, 0xb3e3d57232f44b09ULL, 0x91d7aaa4a512f69bULL, 0x0ffd6fd243dabbccULL, 0x50d26a943c1fde34ULL, 0x6be15e9968545b4fULL, 0x94778fea6faf9fdfULL, 0x2b09dd7058ea4826ULL, 0x677cd9716de5c7bfULL, 0x49d5214fffb2e6ddULL, 0x0360e83a466b273cULL, 0x1fc786af4f7b7691ULL, 0xa0b9d435783ea168ULL, 0xd49f0c035f118cb6ULL, 0x01205816c9d21d14ULL, 0xac2453dd7d8f3d98ULL, 0x545217cc3f70aa64ULL, 0x26b4028e9489c9c2ULL, 0xdec2469fd6765e3eULL, 0x04807d58036f7450ULL, 0xe5f17292823ddb45ULL, 0xf30b569b024a5860ULL, 0x62dcfc3fa758aefbULL, 0xe84cad6c4e5e5aa1ULL, 0xccb81fce556ea94bULL, 0x53b282ae7a74f908ULL, 0x1b47fbf74c1402c1ULL, 0x368eebf39828049fULL, 0x7afbeff2ad278b06ULL, 0xbe5e0a8cfe97caedULL, 0xcfd8f7f413058e77ULL, 0xf78b2bc301252c30ULL, 0x4d555c17fcdd928dULL, 0x5f2f05467fc565f8ULL, 0x24f4b2a21b30f3eaULL, 0x860dd6bbecb768aaULL, 0x4c750401350f8f99ULL, 0x0000000000000000ULL, 0xecccd0344d312ef1ULL, 0xb5231806be220571ULL, 0xc105c030990d28afULL, 0x653c695de25cfd97ULL, 0x159acc33c61ca419ULL, 0xb89ec7f872418495ULL, 0xa9847693b73254dcULL, 0x58cf90243ac13694ULL, 0x59efc832f3132b80ULL, 0x5c4fed7c39ae42c4ULL, 0x828dabe3efd81cfaULL, 0xd13f294d95ace5f2ULL, 0x7d1b7a90e823d86aULL, 0xb643f03cf849224dULL, 0x3df3f979d89dcb03ULL, 0x7426d836272f2ddeULL, 0xdfe21e891fa4432aULL, 0x3a136c1b9d99986fULL, 0xfa36f43dcd46add4ULL, 0xc025982650df35bbULL, 0x856d3e81aadc4f96ULL, 0xc4a5e57e53b041ebULL, 0x4708168b75ba4005ULL, 0xaf44bbe73be41aa4ULL, 0x971767d029c4b8e3ULL, 0xb9be9feebb939981ULL, 0x215497ecd18d9aaeULL, 0x316e7e91dd2c57f3ULL, 0xcef8afe2dad79363ULL, 0x3853dc371220a247ULL, 0x35ee03c9de4323a3ULL, 0xe6919aa8c456fc79ULL, 0xe05157dc4880b201ULL, 0x7bdbb7e464f59612ULL, 0x127a59518318f775ULL, 0x332ecebd52956ddbULL, 0x8f30741d23bb9d1eULL, 0xd922d3fd93720d52ULL, 0x7746300c61440ae2ULL, 0x25d4eab4d2e2eefeULL, 0x75068020eefd30caULL, 0x135a01474acaea61ULL, 0x304e268714fe4ae7ULL, 0xa519f17bb283c82cULL, 0xdc82f6b359cf6416ULL, 0x5baf781e7caa11a8ULL, 0xb2c38d64fb26561dULL, 0x34ce5bdf17913eb7ULL, 0x5d6fb56af07c5fd0ULL, 0x182713cd0a7f25fdULL, 0x9e2ac576e6c84d57ULL, 0x9aaab82ee5a73907ULL, 0xa3d93c0f3e558654ULL, 0x7e7b92aaae48ff56ULL, 0x872d8ead256575beULL, 0x41c8dbfff96c0e7dULL, 0x99ca5014a3cc1e3bULL, 0x40e883e930be1369ULL, 0x1ca76e95091051adULL, 0x4e35b42dbab6b5b1ULL, 0x05a0254ecabd6944ULL, 0xe1710fca8152af15ULL, 0xf22b0e8dcb984574ULL, 0xb763a82a319b3f59ULL, 0x63fca4296e8ab3efULL, 0x9d4a2d4ca0a36a6bULL, 0xe331bfe60eeb953dULL, 0xd5bf541596c391a2ULL, 0xf5cb9bef8e9c1618ULL, 0x46284e9dbc685d11ULL, 0x2074cffa185f87baULL, 0xbd3ee2b6b8fcedd1ULL, 0xae64e3f1f23607b0ULL, 0xfeb68965ce29d984ULL, 0x55724fdaf6a2b770ULL, 0x29496d5cd753720eULL, 0xa75941573d3af204ULL, 0x8e102c0bea69800aULL, 0x111ab16bc573d049ULL, 0xd7ffe439197aab8aULL, 0xefac380e0b5a09cdULL, 0x48f579593660fbc9ULL, 0x22347fd697e6bd92ULL, 0x61bc1405e13389c7ULL, 0x4ab5c975b9d9c1e1ULL, 0x80cd1bcf606126d2ULL, 0x7186fd78ed92449aULL, 0x93971a882aabccb3ULL, 0x88d0e17f66bfce72ULL, 0x27945a985d5bd4d6ULL }, { 0xde553f8c05a811c8ULL, 0x1906b59631b4f565ULL, 0x436e70d6b1964ff7ULL, 0x36d343cb8b1e9d85ULL, 0x843dfacc858aab5aULL, 0xfdfc95c299bfc7f9ULL, 0x0f634bdea1d51fa2ULL, 0x6d458b3b76efb3cdULL, 0x85c3f77cf8593f80ULL, 0x3c91315fbe737cb2ULL, 0x2148b03366ace398ULL, 0x18f8b8264c6761bfULL, 0xc830c1c495c9fb0fULL, 0x981a76102086a0aaULL, 0xaa16012142f35760ULL, 0x35cc54060c763cf6ULL, 0x42907d66cc45db2dULL, 0x8203d44b965af4bcULL, 0x3d6f3cefc3a0e868ULL, 0xbc73ff69d292bda7ULL, 0x8722ed0102e20a29ULL, 0x8f8185e8cd34deb7ULL, 0x9b0561dda7ee01d9ULL, 0x5335a0193227fad6ULL, 0xc9cecc74e81a6fd5ULL, 0x54f5832e5c2431eaULL, 0x99e47ba05d553470ULL, 0xf7bee756acd226ceULL, 0x384e05a5571816fdULL, 0xd1367452a47d0e6aULL, 0xf29fde1c386ad85bULL, 0x320c77316275f7caULL, 0xd0c879e2d9ae9ab0ULL, 0xdb7406c69110ef5dULL, 0x45505e51a2461011ULL, 0xfc029872e46c5323ULL, 0xfa3cb6f5f7bc0cc5ULL, 0x031f17cd8768a173ULL, 0xbd8df2d9af41297dULL, 0x9d3b4f5ab43e5e3fULL, 0x4071671b36feee84ULL, 0x716207e7d3e3b83dULL, 0x48d20ff2f9283a1aULL, 0x27769eb4757cbc7eULL, 0x5c56ebc793f2e574ULL, 0xa48b474f9ef5dc18ULL, 0x52cbada94ff46e0cULL, 0x60c7da982d8199c6ULL, 0x0e9d466edc068b78ULL, 0x4eec2175eaf865fcULL, 0x550b8e9e21f7a530ULL, 0x6b7ba5bc653fec2bULL, 0x5eb7f1ba6949d0ddULL, 0x57ea94e3db4c9099ULL, 0xf640eae6d101b214ULL, 0xdd4a284182c0b0bbULL, 0xff1d8fbf6304f250ULL, 0xb8accb933bf9d7e8ULL, 0xe8867c478eb68c4dULL, 0x3f8e2692391bddc1ULL, 0xcb2fd60912a15a7cULL, 0xaec935dbab983d2fULL, 0xf55ffd2b56691367ULL, 0x80e2ce366ce1c115ULL, 0x179bf3f8edb27e1dULL, 0x01fe0db07dd394daULL, 0xda8a0b76ecc37b87ULL, 0x44ae53e1df9584cbULL, 0xb310b4b77347a205ULL, 0xdfab323c787b8512ULL, 0x3b511268d070b78eULL, 0x65e6e3d2b9396753ULL, 0x6864b271e2574d58ULL, 0x259784c98fc789d7ULL, 0x02e11a7dfabb35a9ULL, 0x8841a6dfa337158bULL, 0x7ade78c39b5dcdd0ULL, 0xb7cf804d9a2cc84aULL, 0x20b6bd831b7f7742ULL, 0x75bd331d3a88d272ULL, 0x418f6aab4b2d7a5eULL, 0xd9951cbb6babdaf4ULL, 0xb6318dfde7ff5c90ULL, 0x1f389b112264aa83ULL, 0x492c024284fbaec0ULL, 0xe33a0363c608f9a0ULL, 0x2688930408af28a4ULL, 0xc7538a1a341ce4adULL, 0x5da8e677ee2171aeULL, 0x8c9e92254a5c7fc4ULL, 0x63d8cd55aae938b5ULL, 0x29ebd8daa97a3706ULL, 0x959827b37be88aa1ULL, 0x1484e4356adadf6eULL, 0xa7945082199d7d6bULL, 0xbf6ce8a455fa1cd4ULL, 0x9cc542eac9edcae5ULL, 0x79c16f0e1c356ca3ULL, 0x89bfab6fdee48151ULL, 0xd4174d1830c5f0ffULL, 0x9258048415eb419dULL, 0x6139d72850520d1cULL, 0x6a85a80c18ec78f1ULL, 0xcd11f88e0171059aULL, 0xcceff53e7ca29140ULL, 0xd229639f2315af19ULL, 0x90b91ef9ef507434ULL, 0x5977d28d074a1be1ULL, 0x311360fce51d56b9ULL, 0xc093a92d5a1f2f91ULL, 0x1a19a25bb6dc5416ULL, 0xeb996b8a09de2d3eULL, 0xfee3820f1ed7668aULL, 0xd7085ad5b7ad518cULL, 0x7fff41890fe53345ULL, 0xec5948bd67dde602ULL, 0x2fd5f65dbaaa68e0ULL, 0xa5754affe32648c2ULL, 0xf8ddac880d07396cULL, 0x6fa491468c548664ULL, 0x0c7c5c1326bdbed1ULL, 0x4a33158f03930fb3ULL, 0x699abfc19f84d982ULL, 0xe4fa2054a80b329cULL, 0x6707f9af438252faULL, 0x08a368e9cfd6d49eULL, 0x47b1442c58fd25b8ULL, 0xbbb3dc5ebc91769bULL, 0x1665fe489061eac7ULL, 0x33f27a811fa66310ULL, 0x93a609346838d547ULL, 0x30ed6d4c98cec263ULL, 0x1dd9816cd8df9f2aULL, 0x94662a03063b1e7bULL, 0x83fdd9fbeb896066ULL, 0x7b207573e68e590aULL, 0x5f49fc0a149a4407ULL, 0x343259b671a5a82cULL, 0xfbc2bb458a6f981fULL, 0xc272b350a0a41a38ULL, 0x3aaf1fd8ada32354ULL, 0x6cbb868b0b3c2717ULL, 0xa2b569c88d2583feULL, 0xf180c9d1bf027928ULL, 0xaf37386bd64ba9f5ULL, 0x12bacab2790a8088ULL, 0x4c0d3b0810435055ULL, 0xb2eeb9070e9436dfULL, 0xc5b29067cea7d104ULL, 0xdcb425f1ff132461ULL, 0x4f122cc5972bf126ULL, 0xac282fa651230886ULL, 0xe7e537992f6393efULL, 0xe61b3a2952b00735ULL, 0x709c0a57ae302ce7ULL, 0xe02514ae416058d3ULL, 0xc44c9dd7b37445deULL, 0x5a68c5408022ba92ULL, 0x1c278cdca50c0bf0ULL, 0x6e5a9cf6f18712beULL, 0x86dce0b17f319ef3ULL, 0x2d34ec2040115d49ULL, 0x4bcd183f7e409b69ULL, 0x2815d56ad4a9a3dcULL, 0x24698979f2141d0dULL, 0x0000000000000000ULL, 0x1ec696a15fb73e59ULL, 0xd86b110b16784e2eULL, 0x8e7f8858b0e74a6dULL, 0x063e2e8713d05fe6ULL, 0xe2c40ed3bbdb6d7aULL, 0xb1f1aeca89fc97acULL, 0xe1db191e3cb3cc09ULL, 0x6418ee62c4eaf389ULL, 0xc6ad87aa49cf7077ULL, 0xd6f65765ca7ec556ULL, 0x9afb6c6dda3d9503ULL, 0x7ce05644888d9236ULL, 0x8d609f95378feb1eULL, 0x23a9aa4e9c17d631ULL, 0x6226c0e5d73aac6fULL, 0x56149953a69f0443ULL, 0xeeb852c09d66d3abULL, 0x2b0ac2a753c102afULL, 0x07c023376e03cb3cULL, 0x2ccae1903dc2c993ULL, 0xd3d76e2f5ec63bc3ULL, 0x9e2458973356ff4cULL, 0xa66a5d32644ee9b1ULL, 0x0a427294356de137ULL, 0x783f62be61e6f879ULL, 0x1344c70204d91452ULL, 0x5b96c8f0fdf12e48ULL, 0xa90916ecc59bf613ULL, 0xbe92e5142829880eULL, 0x727d102a548b194eULL, 0x1be7afebcb0fc0ccULL, 0x3e702b2244c8491bULL, 0xd5e940a84d166425ULL, 0x66f9f41f3e51c620ULL, 0xabe80c913f20c3baULL, 0xf07ec461c2d1edf2ULL, 0xf361d3ac45b94c81ULL, 0x0521394a94b8fe95ULL, 0xadd622162cf09c5cULL, 0xe97871f7f3651897ULL, 0xf4a1f09b2bba87bdULL, 0x095d6559b2054044ULL, 0x0bbc7f2448be75edULL, 0x2af4cf172e129675ULL, 0x157ae98517094bb4ULL, 0x9fda55274e856b96ULL, 0x914713499283e0eeULL, 0xb952c623462a4332ULL, 0x74433ead475b46a8ULL, 0x8b5eb112245fb4f8ULL, 0xa34b6478f0f61724ULL, 0x11a5dd7ffe6221fbULL, 0xc16da49d27ccbb4bULL, 0x76a224d0bde07301ULL, 0x8aa0bca2598c2022ULL, 0x4df336b86d90c48fULL, 0xea67663a740db9e4ULL, 0xef465f70e0b54771ULL, 0x39b008152acb8227ULL, 0x7d1e5bf4f55e06ecULL, 0x105bd0cf83b1b521ULL, 0x775c2960c033e7dbULL, 0x7e014c397236a79fULL, 0x811cc386113255cfULL, 0xeda7450d1a0e72d8ULL, 0x5889df3d7a998f3bULL, 0x2e2bfbedc779fc3aULL, 0xce0eef438619a4e9ULL, 0x372d4e7bf6cd095fULL, 0x04df34fae96b6a4fULL, 0xf923a13870d4adb6ULL, 0xa1aa7e050a4d228dULL, 0xa8f71b5cb84862c9ULL, 0xb52e9a306097fde3ULL, 0x0d8251a35b6e2a0bULL, 0x2257a7fee1c442ebULL, 0x73831d9a29588d94ULL, 0x51d4ba64c89ccf7fULL, 0x502ab7d4b54f5ba5ULL, 0x97793dce8153bf08ULL, 0xe5042de4d5d8a646ULL, 0x9687307efc802bd2ULL, 0xa05473b5779eb657ULL, 0xb4d097801d446939ULL, 0xcff0e2f3fbca3033ULL, 0xc38cbee0dd778ee2ULL, 0x464f499c252eb162ULL, 0xcad1dbb96f72cea6ULL, 0xba4dd1eec142e241ULL, 0xb00fa37af42f0376ULL }, { 0xcce4cd3aa968b245ULL, 0x089d5484e80b7fafULL, 0x638246c1b3548304ULL, 0xd2fe0ec8c2355492ULL, 0xa7fbdf7ff2374eeeULL, 0x4df1600c92337a16ULL, 0x84e503ea523b12fbULL, 0x0790bbfd53ab0c4aULL, 0x198a780f38f6ea9dULL, 0x2ab30c8f55ec48cbULL, 0xe0f7fed6b2c49db5ULL, 0xb6ecf3f422cadbdcULL, 0x409c9a541358df11ULL, 0xd3ce8a56dfde3fe3ULL, 0xc3e9224312c8c1a0ULL, 0x0d6dfa58816ba507ULL, 0xddf3e1b179952777ULL, 0x04c02a42748bb1d9ULL, 0x94c2abff9f2decb8ULL, 0x4f91752da8f8acf4ULL, 0x78682befb169bf7bULL, 0xe1c77a48af2ff6c4ULL, 0x0c5d7ec69c80ce76ULL, 0x4cc1e4928fd81167ULL, 0xfeed3d24d9997b62ULL, 0x518bb6dfc3a54a23ULL, 0x6dbf2d26151f9b90ULL, 0xb5bc624b05ea664fULL, 0xe86aaa525acfe21aULL, 0x4801ced0fb53a0beULL, 0xc91463e6c00868edULL, 0x1027a815cd16fe43ULL, 0xf67069a0319204cdULL, 0xb04ccc976c8abce7ULL, 0xc0b9b3fc35e87c33ULL, 0xf380c77c58f2de65ULL, 0x50bb3241de4e2152ULL, 0xdf93f490435ef195ULL, 0xf1e0d25d62390887ULL, 0xaf668bfb1a3c3141ULL, 0xbc11b251f00a7291ULL, 0x73a5eed47e427d47ULL, 0x25bee3f6ee4c3b2eULL, 0x43cc0beb34786282ULL, 0xc824e778dde3039cULL, 0xf97d86d98a327728ULL, 0xf2b043e24519b514ULL, 0xe297ebf7880f4b57ULL, 0x3a94a49a98fab688ULL, 0x868516cb68f0c419ULL, 0xeffa11af0964ee50ULL, 0xa4ab4ec0d517f37dULL, 0xa9c6b498547c567aULL, 0x8e18424f80fbbbb6ULL, 0x0bcdc53bcf2bc23cULL, 0x137739aaea3643d0ULL, 0x2c1333ec1bac2ff0ULL, 0x8d48d3f0a7db0625ULL, 0x1e1ac3f26b5de6d7ULL, 0xf520f81f16b2b95eULL, 0x9f0f6ec450062e84ULL, 0x0130849e1deb6b71ULL, 0xd45e31ab8c7533a9ULL, 0x652279a2fd14e43fULL, 0x3209f01e70f1c927ULL, 0xbe71a770cac1a473ULL, 0x0e3d6be7a64b1894ULL, 0x7ec8148cff29d840ULL, 0xcb7476c7fac3be0fULL, 0x72956a4a63a91636ULL, 0x37f95ec21991138fULL, 0x9e3fea5a4ded45f5ULL, 0x7b38ba50964902e8ULL, 0x222e580bbde73764ULL, 0x61e253e0899f55e6ULL, 0xfc8d2805e352ad80ULL, 0x35994be3235ac56dULL, 0x09add01af5e014deULL, 0x5e8659a6780539c6ULL, 0xb17c48097161d796ULL, 0x026015213acbd6e2ULL, 0xd1ae9f77e515e901ULL, 0xb7dc776a3f21b0adULL, 0xaba6a1b96eb78098ULL, 0x9bcf4486248d9f5dULL, 0x582666c536455efdULL, 0xfdbdac9bfeb9c6f1ULL, 0xc47999be4163cdeaULL, 0x765540081722a7efULL, 0x3e548ed8ec710751ULL, 0x3d041f67cb51bac2ULL, 0x7958af71ac82d40aULL, 0x36c9da5c047a78feULL, 0xed9a048e33af38b2ULL, 0x26ee7249c96c86bdULL, 0x900281bdeba65d61ULL, 0x11172c8bd0fd9532ULL, 0xea0abf73600434f8ULL, 0x42fc8f75299309f3ULL, 0x34a9cf7d3eb1ae1cULL, 0x2b838811480723baULL, 0x5ce64c8742ceef24ULL, 0x1adae9b01fd6570eULL, 0x3c349bf9d6bad1b3ULL, 0x82453c891c7b75c0ULL, 0x97923a40b80d512bULL, 0x4a61dbf1c198765cULL, 0xb48ce6d518010d3eULL, 0xcfb45c858e480fd6ULL, 0xd933cbf30d1e96aeULL, 0xd70ea014ab558e3aULL, 0xc189376228031742ULL, 0x9262949cd16d8b83ULL, 0xeb3a3bed7def5f89ULL, 0x49314a4ee6b8cbcfULL, 0xdcc3652f647e4c06ULL, 0xda635a4c2a3e2b3dULL, 0x470c21a940f3d35bULL, 0x315961a157d174b4ULL, 0x6672e81dda3459acULL, 0x5b76f77a1165e36eULL, 0x445cb01667d36ec8ULL, 0xc5491d205c88a69bULL, 0x456c34887a3805b9ULL, 0xffddb9bac4721013ULL, 0x99af51a71e4649bfULL, 0xa15be01cbc7729d5ULL, 0x52db2760e485f7b0ULL, 0x8c78576eba306d54ULL, 0xae560f6507d75a30ULL, 0x95f22f6182c687c9ULL, 0x71c5fbf54489aba5ULL, 0xca44f259e728d57eULL, 0x88b87d2ccebbdc8dULL, 0xbab18d32be4a15aaULL, 0x8be8ec93e99b611eULL, 0x17b713e89ebdf209ULL, 0xb31c5d284baa0174ULL, 0xeeca9531148f8521ULL, 0xb8d198138481c348ULL, 0x8988f9b2d350b7fcULL, 0xb9e11c8d996aa839ULL, 0x5a4673e40c8e881fULL, 0x1687977683569978ULL, 0xbf4123eed72acf02ULL, 0x4ea1f1b3b513c785ULL, 0xe767452be16f91ffULL, 0x7505d1b730021a7cULL, 0xa59bca5ec8fc980cULL, 0xad069eda20f7e7a3ULL, 0x38f4b1bba231606aULL, 0x60d2d77e94743e97ULL, 0x9affc0183966f42cULL, 0x248e6768f3a7505fULL, 0xcdd449a4b483d934ULL, 0x87b59255751baf68ULL, 0x1bea6d2e023d3c7fULL, 0x6b1f12455b5ffcabULL, 0x743555292de9710dULL, 0xd8034f6d10f5fddfULL, 0xc6198c9f7ba81b08ULL, 0xbb8109aca3a17edbULL, 0xfa2d1766ad12cabbULL, 0xc729080166437079ULL, 0x9c5fff7b77269317ULL, 0x0000000000000000ULL, 0x15d706c9a47624ebULL, 0x6fdf38072fd44d72ULL, 0x5fb6dd3865ee52b7ULL, 0xa33bf53d86bcff37ULL, 0xe657c1b5fc84fa8eULL, 0xaa962527735cebe9ULL, 0x39c43525bfda0b1bULL, 0x204e4d2a872ce186ULL, 0x7a083ece8ba26999ULL, 0x554b9c9db72efbfaULL, 0xb22cd9b656416a05ULL, 0x96a2bedea5e63a5aULL, 0x802529a826b0a322ULL, 0x8115ad363b5bc853ULL, 0x8375b81701901eb1ULL, 0x3069e53f4a3a1fc5ULL, 0xbd2136cfede119e0ULL, 0x18bafc91251d81ecULL, 0x1d4a524d4c7d5b44ULL, 0x05f0aedc6960daa8ULL, 0x29e39d3072ccf558ULL, 0x70f57f6b5962c0d4ULL, 0x989fd53903ad22ceULL, 0xf84d024797d91c59ULL, 0x547b1803aac5908bULL, 0xf0d056c37fd263f6ULL, 0xd56eb535919e58d8ULL, 0x1c7ad6d351963035ULL, 0x2e7326cd2167f912ULL, 0xac361a443d1c8cd2ULL, 0x697f076461942a49ULL, 0x4b515f6fdc731d2dULL, 0x8ad8680df4700a6fULL, 0x41ac1eca0eb3b460ULL, 0x7d988533d80965d3ULL, 0xa8f6300649973d0bULL, 0x7765c4960ac9cc9eULL, 0x7ca801adc5e20ea2ULL, 0xdea3700e5eb59ae4ULL, 0xa06b6482a19c42a4ULL, 0x6a2f96db46b497daULL, 0x27def6d7d487edccULL, 0x463ca5375d18b82aULL, 0xa6cb5be1efdc259fULL, 0x53eba3fef96e9cc1ULL, 0xce84d81b93a364a7ULL, 0xf4107c810b59d22fULL, 0x333974806d1aa256ULL, 0x0f0def79bba073e5ULL, 0x231edc95a00c5c15ULL, 0xe437d494c64f2c6cULL, 0x91320523f64d3610ULL, 0x67426c83c7df32ddULL, 0x6eefbc99323f2603ULL, 0x9d6f7be56acdf866ULL, 0x5916e25b2bae358cULL, 0x7ff89012e2c2b331ULL, 0x035091bf2720bd93ULL, 0x561b0d22900e4669ULL, 0x28d319ae6f279e29ULL, 0x2f43a2533c8c9263ULL, 0xd09e1be9f8fe8270ULL, 0xf740ed3e2c796fbcULL, 0xdb53ded237d5404cULL, 0x62b2c25faebfe875ULL, 0x0afd41a5d2c0a94dULL, 0x6412fd3ce0ff8f4eULL, 0xe3a76f6995e42026ULL, 0x6c8fa9b808f4f0e1ULL, 0xc2d9a6dd0f23aad1ULL, 0x8f28c6d19d10d0c7ULL, 0x85d587744fd0798aULL, 0xa20b71a39b579446ULL, 0x684f83fa7c7f4138ULL, 0xe507500adba4471dULL, 0x3f640a46f19a6c20ULL, 0x1247bd34f7dd28a1ULL, 0x2d23b77206474481ULL, 0x93521002cc86e0f2ULL, 0x572b89bc8de52d18ULL, 0xfb1d93f8b0f9a1caULL, 0xe95a2ecc4724896bULL, 0x3ba420048511ddf9ULL, 0xd63e248ab6bee54bULL, 0x5dd6c8195f258455ULL, 0x06a03f634e40673bULL, 0x1f2a476c76b68da6ULL, 0x217ec9b49ac78af7ULL, 0xecaa80102e4453c3ULL, 0x14e78257b99d4f9aULL }, { 0x20329b2cc87bba05ULL, 0x4f5eb6f86546a531ULL, 0xd4f44775f751b6b1ULL, 0x8266a47b850dfa8bULL, 0xbb986aa15a6ca985ULL, 0xc979eb08f9ae0f99ULL, 0x2da6f447a2375ea1ULL, 0x1e74275dcd7d8576ULL, 0xbc20180a800bc5f8ULL, 0xb4a2f701b2dc65beULL, 0xe726946f981b6d66ULL, 0x48e6c453bf21c94cULL, 0x42cad9930f0a4195ULL, 0xefa47b64aacccd20ULL, 0x71180a8960409a42ULL, 0x8bb3329bf6a44e0cULL, 0xd34c35de2d36daccULL, 0xa92f5b7cbc23dc96ULL, 0xb31a85aa68bb09c3ULL, 0x13e04836a73161d2ULL, 0xb24dfc4129c51d02ULL, 0x8ae44b70b7da5acdULL, 0xe671ed84d96579a7ULL, 0xa4bb3417d66f3832ULL, 0x4572ab38d56d2de8ULL, 0xb1b47761ea47215cULL, 0xe81c09cf70aba15dULL, 0xffbdb872ce7f90acULL, 0xa8782297fd5dc857ULL, 0x0d946f6b6a4ce4a4ULL, 0xe4df1f4f5b995138ULL, 0x9ebc71edca8c5762ULL, 0x0a2c1dc0b02b88d9ULL, 0x3b503c115d9d7b91ULL, 0xc64376a8111ec3a2ULL, 0xcec199a323c963e4ULL, 0xdc76a87ec58616f7ULL, 0x09d596e073a9b487ULL, 0x14583a9d7d560dafULL, 0xf4c6dc593f2a0cb4ULL, 0xdd21d19584f80236ULL, 0x4a4836983ddde1d3ULL, 0xe58866a41ae745f9ULL, 0xf591a5b27e541875ULL, 0x891dc05074586693ULL, 0x5b068c651810a89eULL, 0xa30346bc0c08544fULL, 0x3dbf3751c684032dULL, 0x2a1e86ec785032dcULL, 0xf73f5779fca830eaULL, 0xb60c05ca30204d21ULL, 0x0cc316802b32f065ULL, 0x8770241bdd96be69ULL, 0xb861e18199ee95dbULL, 0xf805cad91418fcd1ULL, 0x29e70dccbbd20e82ULL, 0xc7140f435060d763ULL, 0x0f3a9da0e8b0cc3bULL, 0xa2543f574d76408eULL, 0xbd7761e1c175d139ULL, 0x4b1f4f737ca3f512ULL, 0x6dc2df1f2fc137abULL, 0xf1d05c3967b14856ULL, 0xa742bf3715ed046cULL, 0x654030141d1697edULL, 0x07b872abda676c7dULL, 0x3ce84eba87fa17ecULL, 0xc1fb0403cb79afdfULL, 0x3e46bc7105063f73ULL, 0x278ae987121cd678ULL, 0xa1adb4778ef47cd0ULL, 0x26dd906c5362c2b9ULL, 0x05168060589b44e2ULL, 0xfbfc41f9d79ac08fULL, 0x0e6de44ba9ced8faULL, 0x9feb08068bf243a3ULL, 0x7b341749d06b129bULL, 0x229c69e74a87929aULL, 0xe09ee6c4427c011bULL, 0x5692e30e725c4c3aULL, 0xda99a33e5e9f6e4bULL, 0x353dd85af453a36bULL, 0x25241b4c90e0fee7ULL, 0x5de987258309d022ULL, 0xe230140fc0802984ULL, 0x93281e86a0c0b3c6ULL, 0xf229d719a4337408ULL, 0x6f6c2dd4ad3d1f34ULL, 0x8ea5b2fbae3f0aeeULL, 0x8331dd90c473ee4aULL, 0x346aa1b1b52db7aaULL, 0xdf8f235e06042aa9ULL, 0xcc6f6b68a1354b7bULL, 0x6c95a6f46ebf236aULL, 0x52d31a856bb91c19ULL, 0x1a35ded6d498d555ULL, 0xf37eaef2e54d60c9ULL, 0x72e181a9a3c2a61cULL, 0x98537aad51952fdeULL, 0x16f6c856ffaa2530ULL, 0xd960281e9d1d5215ULL, 0x3a0745fa1ce36f50ULL, 0x0b7b642bf1559c18ULL, 0x59a87eae9aec8001ULL, 0x5e100c05408bec7cULL, 0x0441f98b19e55023ULL, 0xd70dcc5534d38aefULL, 0x927f676de1bea707ULL, 0x9769e70db925e3e5ULL, 0x7a636ea29115065aULL, 0x468b201816ef11b6ULL, 0xab81a9b73edff409ULL, 0xc0ac7de88a07bb1eULL, 0x1f235eb68c0391b7ULL, 0x6056b074458dd30fULL, 0xbe8eeac102f7ed67ULL, 0xcd381283e04b5fbaULL, 0x5cbefecec277c4e3ULL, 0xd21b4c356c48ce0dULL, 0x1019c31664b35d8cULL, 0x247362a7d19eea26ULL, 0xebe582efb3299d03ULL, 0x02aef2cb82fc289fULL, 0x86275df09ce8aaa8ULL, 0x28b07427faac1a43ULL, 0x38a9b7319e1f47cfULL, 0xc82e92e3b8d01b58ULL, 0x06ef0b409b1978bcULL, 0x62f842bfc771fb90ULL, 0x9904034610eb3b1fULL, 0xded85ab5477a3e68ULL, 0x90d195a663428f98ULL, 0x5384636e2ac708d8ULL, 0xcbd719c37b522706ULL, 0xae9729d76644b0ebULL, 0x7c8c65e20a0c7ee6ULL, 0x80c856b007f1d214ULL, 0x8c0b40302cc32271ULL, 0xdbcedad51fe17a8aULL, 0x740e8ae938dbdea0ULL, 0xa615c6dc549310adULL, 0x19cc55f6171ae90bULL, 0x49b1bdb8fe5fdd8dULL, 0xed0a89af2830e5bfULL, 0x6a7aadb4f5a65bd6ULL, 0x7e22972988f05679ULL, 0xf952b3325566e810ULL, 0x39fecedadf61530eULL, 0x6101c99f04f3c7ceULL, 0x2e5f7f6761b562ffULL, 0xf08725d226cf5c97ULL, 0x63af3b54860fef51ULL, 0x8ff2cb10ef411e2fULL, 0x884ab9bb35267252ULL, 0x4df04433e7ba8daeULL, 0x9afd8866d3690741ULL, 0x66b9bb34de94abb3ULL, 0x9baaf18d92171380ULL, 0x543c11c5f0a064a5ULL, 0x17a1b1bdbed431f1ULL, 0xb5f58eeaf3a2717fULL, 0xc355f6c849858740ULL, 0xec5df044694ef17eULL, 0xd83751f5dc6346d4ULL, 0xfc4433520dfdacf2ULL, 0x0000000000000000ULL, 0x5a51f58e596ebc5fULL, 0x3285aaf12e34cf16ULL, 0x8d5c39db6dbd36b0ULL, 0x12b731dde64f7513ULL, 0x94906c2d7aa7dfbbULL, 0x302b583aacc8e789ULL, 0x9d45facd090e6b3cULL, 0x2165e2c78905aec4ULL, 0x68d45f7f775a7349ULL, 0x189b2c1d5664fdcaULL, 0xe1c99f2f030215daULL, 0x6983269436246788ULL, 0x8489af3b1e148237ULL, 0xe94b702431d5b59cULL, 0x33d2d31a6f4adbd7ULL, 0xbfd9932a4389f9a6ULL, 0xb0e30e8aab39359dULL, 0xd1e2c715afcaf253ULL, 0x150f43763c28196eULL, 0xc4ed846393e2eb3dULL, 0x03f98b20c3823c5eULL, 0xfd134ab94c83b833ULL, 0x556b682eb1de7064ULL, 0x36c4537a37d19f35ULL, 0x7559f30279a5ca61ULL, 0x799ae58252973a04ULL, 0x9c12832648707ffdULL, 0x78cd9c6913e92ec5ULL, 0x1d8dac7d0effb928ULL, 0x439da0784e745554ULL, 0x413352b3cc887dcbULL, 0xbacf134a1b12bd44ULL, 0x114ebafd25cd494dULL, 0x2f08068c20cb763eULL, 0x76a07822ba27f63fULL, 0xeab2fb04f25789c2ULL, 0xe3676de481fe3d45ULL, 0x1b62a73d95e6c194ULL, 0x641749ff5c68832cULL, 0xa5ec4dfc97112cf3ULL, 0xf6682e92bdd6242bULL, 0x3f11c59a44782bb2ULL, 0x317c21d1edb6f348ULL, 0xd65ab5be75ad9e2eULL, 0x6b2dd45fb4d84f17ULL, 0xfaab381296e4d44eULL, 0xd0b5befeeeb4e692ULL, 0x0882ef0b32d7a046ULL, 0x512a91a5a83b2047ULL, 0x963e9ee6f85bf724ULL, 0x4e09cf132438b1f0ULL, 0x77f701c9fb59e2feULL, 0x7ddb1c094b726a27ULL, 0x5f4775ee01f5f8bdULL, 0x9186ec4d223c9b59ULL, 0xfeeac1998f01846dULL, 0xac39db1ce4b89874ULL, 0xb75b7c21715e59e0ULL, 0xafc0503c273aa42aULL, 0x6e3b543fec430bf5ULL, 0x704f7362213e8e83ULL, 0x58ff0745db9294c0ULL, 0x67eec2df9feabf72ULL, 0xa0facd9ccf8a6811ULL, 0xb936986ad890811aULL, 0x95c715c63bd9cb7aULL, 0xca8060283a2c33c7ULL, 0x507de84ee9453486ULL, 0x85ded6d05f6a96f6ULL, 0x1cdad5964f81ade9ULL, 0xd5a33e9eb62fa270ULL, 0x40642b588df6690aULL, 0x7f75eec2c98e42b8ULL, 0x2cf18dace3494a60ULL, 0x23cb100c0bf9865bULL, 0xeef3028febb2d9e1ULL, 0x4425d2d394133929ULL, 0xaad6d05c7fa1e0c8ULL, 0xad6ea2f7a5c68cb5ULL, 0xc2028f2308fb9381ULL, 0x819f2f5b468fc6d5ULL, 0xc5bafd88d29cfffcULL, 0x47dc59f357910577ULL, 0x2b49ff07392e261dULL, 0x57c59ae5332258fbULL, 0x73b6f842e2bcb2ddULL, 0xcf96e04862b77725ULL, 0x4ca73dd8a6c4996fULL, 0x015779eb417e14c1ULL, 0x37932a9176af8bf4ULL }, { 0x190a2c9b249df23eULL, 0x2f62f8b62263e1e9ULL, 0x7a7f754740993655ULL, 0x330b7ba4d5564d9fULL, 0x4c17a16a46672582ULL, 0xb22f08eb7d05f5b8ULL, 0x535f47f40bc148ccULL, 0x3aec5d27d4883037ULL, 0x10ed0a1825438f96ULL, 0x516101f72c233d17ULL, 0x13cc6f949fd04eaeULL, 0x739853c441474bfdULL, 0x653793d90d3f5b1bULL, 0x5240647b96b0fc2fULL, 0x0c84890ad27623e0ULL, 0xd7189b32703aaea3ULL, 0x2685de3523bd9c41ULL, 0x99317c5b11bffefaULL, 0x0d9baa854f079703ULL, 0x70b93648fbd48ac5ULL, 0xa80441fce30bc6beULL, 0x7287704bdc36ff1eULL, 0xb65384ed33dc1f13ULL, 0xd36417343ee34408ULL, 0x39cd38ab6e1bf10fULL, 0x5ab861770a1f3564ULL, 0x0ebacf09f594563bULL, 0xd04572b884708530ULL, 0x3cae9722bdb3af47ULL, 0x4a556b6f2f5cbaf2ULL, 0xe1704f1f76c4bd74ULL, 0x5ec4ed7144c6dfcfULL, 0x16afc01d4c7810e6ULL, 0x283f113cd629ca7aULL, 0xaf59a8761741ed2dULL, 0xeed5a3991e215facULL, 0x3bf37ea849f984d4ULL, 0xe413e096a56ce33cULL, 0x2c439d3a98f020d1ULL, 0x637559dc6404c46bULL, 0x9e6c95d1e5f5d569ULL, 0x24bb9836045fe99aULL, 0x44efa466dac8ecc9ULL, 0xc6eab2a5c80895d6ULL, 0x803b50c035220cc4ULL, 0x0321658cba93c138ULL, 0x8f9ebc465dc7ee1cULL, 0xd15a5137190131d3ULL, 0x0fa5ec8668e5e2d8ULL, 0x91c979578d1037b1ULL, 0x0642ca05693b9f70ULL, 0xefca80168350eb4fULL, 0x38d21b24f36a45ecULL, 0xbeab81e1af73d658ULL, 0x8cbfd9cae7542f24ULL, 0xfd19cc0d81f11102ULL, 0x0ac6430fbb4dbc90ULL, 0x1d76a09d6a441895ULL, 0x2a01573ff1cbbfa1ULL, 0xb572e161894fde2bULL, 0x8124734fa853b827ULL, 0x614b1fdf43e6b1b0ULL, 0x68ac395c4238cc18ULL, 0x21d837bfd7f7b7d2ULL, 0x20c714304a860331ULL, 0x5cfaab726324aa14ULL, 0x74c5ba4eb50d606eULL, 0xf3a3030474654739ULL, 0x23e671bcf015c209ULL, 0x45f087e947b9582aULL, 0xd8bd77b418df4c7bULL, 0xe06f6c90ebb50997ULL, 0x0bd96080263c0873ULL, 0x7e03f9410e40dcfeULL, 0xb8e94be4c6484928ULL, 0xfb5b0608e8ca8e72ULL, 0x1a2b49179e0e3306ULL, 0x4e29e76961855059ULL, 0x4f36c4e6fcf4e4baULL, 0x49740ee395cf7bcaULL, 0xc2963ea386d17f7dULL, 0x90d65ad810618352ULL, 0x12d34c1b02a1fa4dULL, 0xfa44258775bb3a91ULL, 0x18150f14b9ec46ddULL, 0x1491861e6b9a653dULL, 0x9a1019d7ab2c3fc2ULL, 0x3668d42d06fe13d7ULL, 0xdcc1fbb25606a6d0ULL, 0x969490dd795a1c22ULL, 0x3549b1a1bc6dd2efULL, 0xc94f5e23a0ed770eULL, 0xb9f6686b5b39fdcbULL, 0xc4d4f4a6efeae00dULL, 0xe732851a1fff2204ULL, 0x94aad6de5eb869f9ULL, 0x3f8ff2ae07206e7fULL, 0xfe38a9813b62d03aULL, 0xa7a1ad7a8bee2466ULL, 0x7b6056c8dde882b6ULL, 0x302a1e286fc58ca7ULL, 0x8da0fa457a259bc7ULL, 0xb3302b64e074415bULL, 0x5402ae7eff8b635fULL, 0x08f8050c9cafc94bULL, 0xae468bf98a3059ceULL, 0x88c355cca98dc58fULL, 0xb10e6d67c7963480ULL, 0xbad70de7e1aa3cf3ULL, 0xbfb4a26e320262bbULL, 0xcb711820870f02d5ULL, 0xce12b7a954a75c9dULL, 0x563ce87dd8691684ULL, 0x9f73b65e7884618aULL, 0x2b1e74b06cba0b42ULL, 0x47cec1ea605b2df1ULL, 0x1c698312f735ac76ULL, 0x5fdbcefed9b76b2cULL, 0x831a354c8fb1cdfcULL, 0x820516c312c0791fULL, 0xb74ca762aeadabf0ULL, 0xfc06ef821c80a5e1ULL, 0x5723cbf24518a267ULL, 0x9d4df05d5f661451ULL, 0x588627742dfd40bfULL, 0xda8331b73f3d39a0ULL, 0x17b0e392d109a405ULL, 0xf965400bcf28fba9ULL, 0x7c3dbf4229a2a925ULL, 0x023e460327e275dbULL, 0x6cd0b55a0ce126b3ULL, 0xe62da695828e96e7ULL, 0x42ad6e63b3f373b9ULL, 0xe50cc319381d57dfULL, 0xc5cbd729729b54eeULL, 0x46d1e265fd2a9912ULL, 0x6428b056904eeff8ULL, 0x8be23040131e04b7ULL, 0x6709d5da2add2ec0ULL, 0x075de98af44a2b93ULL, 0x8447dcc67bfbe66fULL, 0x6616f655b7ac9a23ULL, 0xd607b8bded4b1a40ULL, 0x0563af89d3a85e48ULL, 0x3db1b4ad20c21ba4ULL, 0x11f22997b8323b75ULL, 0x292032b34b587e99ULL, 0x7f1cdace9331681dULL, 0x8e819fc9c0b65affULL, 0xa1e3677fe2d5bb16ULL, 0xcd33d225ee349da5ULL, 0xd9a2543b85aef898ULL, 0x795e10cbfa0af76dULL, 0x25a4bbb9992e5d79ULL, 0x78413344677b438eULL, 0xf0826688cef68601ULL, 0xd27b34bba392f0ebULL, 0x551d8df162fad7bcULL, 0x1e57c511d0d7d9adULL, 0xdeffbdb171e4d30bULL, 0xf4feea8e802f6caaULL, 0xa480c8f6317de55eULL, 0xa0fc44f07fa40ff5ULL, 0x95b5f551c3c9dd1aULL, 0x22f952336d6476eaULL, 0x0000000000000000ULL, 0xa6be8ef5169f9085ULL, 0xcc2cf1aa73452946ULL, 0x2e7ddb39bf12550aULL, 0xd526dd3157d8db78ULL, 0x486b2d6c08becf29ULL, 0x9b0f3a58365d8b21ULL, 0xac78cdfaadd22c15ULL, 0xbc95c7e28891a383ULL, 0x6a927f5f65dab9c3ULL, 0xc3891d2c1ba0cb9eULL, 0xeaa92f9f50f8b507ULL, 0xcf0d9426c9d6e87eULL, 0xca6e3baf1a7eb636ULL, 0xab25247059980786ULL, 0x69b31ad3df4978fbULL, 0xe2512a93cc577c4cULL, 0xff278a0ea61364d9ULL, 0x71a615c766a53e26ULL, 0x89dc764334fc716cULL, 0xf87a638452594f4aULL, 0xf2bc208be914f3daULL, 0x8766b94ac1682757ULL, 0xbbc82e687cdb8810ULL, 0x626a7a53f9757088ULL, 0xa2c202f358467a2eULL, 0x4d0882e5db169161ULL, 0x09e7268301de7da8ULL, 0xe897699c771ac0dcULL, 0xc8507dac3d9cc3edULL, 0xc0a878a0a1330aa6ULL, 0x978bb352e42ba8c1ULL, 0xe9884a13ea6b743fULL, 0x279afdbabecc28a2ULL, 0x047c8c064ed9eaabULL, 0x507e2278b15289f4ULL, 0x599904fbb08cf45cULL, 0xbd8ae46d15e01760ULL, 0x31353da7f2b43844ULL, 0x8558ff49e68a528cULL, 0x76fbfc4d92ef15b5ULL, 0x3456922e211c660cULL, 0x86799ac55c1993b4ULL, 0x3e90d1219a51da9cULL, 0x2d5cbeb505819432ULL, 0x982e5fd48cce4a19ULL, 0xdb9c1238a24c8d43ULL, 0xd439febecaa96f9bULL, 0x418c0bef0960b281ULL, 0x158ea591f6ebd1deULL, 0x1f48e69e4da66d4eULL, 0x8afd13cf8e6fb054ULL, 0xf5e1c9011d5ed849ULL, 0xe34e091c5126c8afULL, 0xad67ee7530a398f6ULL, 0x43b24dec2e82c75aULL, 0x75da99c1287cd48dULL, 0x92e81cdb3783f689ULL, 0xa3dd217cc537cecdULL, 0x60543c50de970553ULL, 0x93f73f54aaf2426aULL, 0xa91b62737e7a725dULL, 0xf19d4507538732e2ULL, 0x77e4dfc20f9ea156ULL, 0x7d229ccdb4d31dc6ULL, 0x1b346a98037f87e5ULL, 0xedf4c615a4b29e94ULL, 0x4093286094110662ULL, 0xb0114ee85ae78063ULL, 0x6ff1d0d6b672e78bULL, 0x6dcf96d591909250ULL, 0xdfe09e3eec9567e8ULL, 0x3214582b4827f97cULL, 0xb46dc2ee143e6ac8ULL, 0xf6c0ac8da7cd1971ULL, 0xebb60c10cd8901e4ULL, 0xf7df8f023abcad92ULL, 0x9c52d3d2c217a0b2ULL, 0x6b8d5cd0f8ab0d20ULL, 0x3777f7a29b8fa734ULL, 0x011f238f9d71b4e3ULL, 0xc1b75b2f3c42be45ULL, 0x5de588fdfe551ef7ULL, 0x6eeef3592b035368ULL, 0xaa3a07ffc4e9b365ULL, 0xecebe59a39c32a77ULL, 0x5ba742f8976e8187ULL, 0x4b4a48e0b22d0e11ULL, 0xddded83dcb771233ULL, 0xa59feb79ac0c51bdULL, 0xc7f5912a55792135ULL }, { 0x6d6ae04668a9b08aULL, 0x3ab3f04b0be8c743ULL, 0xe51e166b54b3c908ULL, 0xbe90a9eb35c2f139ULL, 0xb2c7066637f2bec1ULL, 0xaa6945613392202cULL, 0x9a28c36f3b5201ebULL, 0xddce5a93ab536994ULL, 0x0e34133ef6382827ULL, 0x52a02ba1ec55048bULL, 0xa2f88f97c4b2a177ULL, 0x8640e513ca2251a5ULL, 0xcdf1d36258137622ULL, 0xfe6cb708dedf8ddbULL, 0x8a174a9ec8121e5dULL, 0x679896036b81560eULL, 0x59ed033395795feeULL, 0x1dd778ab8b74edafULL, 0xee533ef92d9f926dULL, 0x2a8c79baf8a8d8f5ULL, 0x6bcf398e69b119f6ULL, 0xe20491742fafdd95ULL, 0x276488e0809c2aecULL, 0xea955b82d88f5cceULL, 0x7102c63a99d9e0c4ULL, 0xf9763017a5c39946ULL, 0x429fa2501f151b3dULL, 0x4659c72bea05d59eULL, 0x984b7fdccf5a6634ULL, 0xf742232953fbb161ULL, 0x3041860e08c021c7ULL, 0x747bfd9616cd9386ULL, 0x4bb1367192312787ULL, 0x1b72a1638a6c44d3ULL, 0x4a0e68a6e8359a66ULL, 0x169a5039f258b6caULL, 0xb98a2ef44edee5a4ULL, 0xd9083fe85e43a737ULL, 0x967f6ce239624e13ULL, 0x8874f62d3c1a7982ULL, 0x3c1629830af06e3fULL, 0x9165ebfd427e5a8eULL, 0xb5dd81794ceeaa5cULL, 0x0de8f15a7834f219ULL, 0x70bd98ede3dd5d25ULL, 0xaccc9ca9328a8950ULL, 0x56664eda1945ca28ULL, 0x221db34c0f8859aeULL, 0x26dbd637fa98970dULL, 0x1acdffb4f068f932ULL, 0x4585254f64090fa0ULL, 0x72de245e17d53afaULL, 0x1546b25d7c546cf4ULL, 0x207e0ffffb803e71ULL, 0xfaaad2732bcf4378ULL, 0xb462dfae36ea17bdULL, 0xcf926fd1ac1b11fdULL, 0xe0672dc7dba7ba4aULL, 0xd3fa49ad5d6b41b3ULL, 0x8ba81449b216a3bcULL, 0x14f9ec8a0650d115ULL, 0x40fc1ee3eb1d7ce2ULL, 0x23a2ed9b758ce44fULL, 0x782c521b14fddc7eULL, 0x1c68267cf170504eULL, 0xbcf31558c1ca96e6ULL, 0xa781b43b4ba6d235ULL, 0xf6fd7dfe29ff0c80ULL, 0xb0a4bad5c3fad91eULL, 0xd199f51ea963266cULL, 0x414340349119c103ULL, 0x5405f269ed4dadf7ULL, 0xabd61bb649969dcdULL, 0x6813dbeae7bdc3c8ULL, 0x65fb2ab09f8931d1ULL, 0xf1e7fae152e3181dULL, 0xc1a67cef5a2339daULL, 0x7a4feea8e0f5bba1ULL, 0x1e0b9acf05783791ULL, 0x5b8ebf8061713831ULL, 0x80e53cdbcb3af8d9ULL, 0x7e898bd315e57502ULL, 0xc6bcfbf0213f2d47ULL, 0x95a38e86b76e942dULL, 0x092e94218d243cbaULL, 0x8339debf453622e7ULL, 0xb11be402b9fe64ffULL, 0x57d9100d634177c9ULL, 0xcc4e8db52217cbc3ULL, 0x3b0cae9c71ec7aa2ULL, 0xfb158ca451cbfe99ULL, 0x2b33276d82ac6514ULL, 0x01bf5ed77a04bde1ULL, 0xc5601994af33f779ULL, 0x75c4a3416cc92e67ULL, 0xf3844652a6eb7fc2ULL, 0x3487e375fdd0ef64ULL, 0x18ae430704609eedULL, 0x4d14efb993298efbULL, 0x815a620cb13e4538ULL, 0x125c354207487869ULL, 0x9eeea614ce42cf48ULL, 0xce2d3106d61fac1cULL, 0xbbe99247bad6827bULL, 0x071a871f7b1c149dULL, 0x2e4a1cc10db81656ULL, 0x77a71ff298c149b8ULL, 0x06a5d9c80118a97cULL, 0xad73c27e488e34b1ULL, 0x443a7b981e0db241ULL, 0xe3bbcfa355ab6074ULL, 0x0af276450328e684ULL, 0x73617a896dd1871bULL, 0x58525de4ef7de20fULL, 0xb7be3dcab8e6cd83ULL, 0x19111dd07e64230cULL, 0x842359a03e2a367aULL, 0x103f89f1f3401fb6ULL, 0xdc710444d157d475ULL, 0xb835702334da5845ULL, 0x4320fc876511a6dcULL, 0xd026abc9d3679b8dULL, 0x17250eee885c0b2bULL, 0x90dab52a387ae76fULL, 0x31fed8d972c49c26ULL, 0x89cba8fa461ec463ULL, 0x2ff5421677bcabb7ULL, 0x396f122f85e41d7dULL, 0xa09b332430bac6a8ULL, 0xc888e8ced7070560ULL, 0xaeaf201ac682ee8fULL, 0x1180d7268944a257ULL, 0xf058a43628e7a5fcULL, 0xbd4c4b8fbbce2b07ULL, 0xa1246df34abe7b49ULL, 0x7d5569b79be9af3cULL, 0xa9b5a705bd9efa12ULL, 0xdb6b835baa4bc0e8ULL, 0x05793bac8f147342ULL, 0x21c1512881848390ULL, 0xfdb0556c50d357e5ULL, 0x613d4fcb6a99ff72ULL, 0x03dce2648e0cda3eULL, 0xe949b9e6568386f0ULL, 0xfc0f0bbb2ad7ea04ULL, 0x6a70675913b5a417ULL, 0x7f36d5046fe1c8e3ULL, 0x0c57af8d02304ff8ULL, 0x32223abdfcc84618ULL, 0x0891caf6f720815bULL, 0xa63eeaec31a26fd4ULL, 0x2507345374944d33ULL, 0x49d28ac266394058ULL, 0xf5219f9aa7f3d6beULL, 0x2d96fea583b4cc68ULL, 0x5a31e1571b7585d0ULL, 0x8ed12fe53d02d0feULL, 0xdfade6205f5b0e4bULL, 0x4cabb16ee92d331aULL, 0x04c6657bf510cea3ULL, 0xd73c2cd6a87b8f10ULL, 0xe1d87310a1a307abULL, 0x6cd5be9112ad0d6bULL, 0x97c032354366f3f2ULL, 0xd4e0ceb22677552eULL, 0x0000000000000000ULL, 0x29509bde76a402cbULL, 0xc27a9e8bd42fe3e4ULL, 0x5ef7842cee654b73ULL, 0xaf107ecdbc86536eULL, 0x3fcacbe784fcb401ULL, 0xd55f90655c73e8cfULL, 0xe6c2f40fdabf1336ULL, 0xe8f6e7312c873b11ULL, 0xeb2a0555a28be12fULL, 0xe4a148bc2eb774e9ULL, 0x9b979db84156bc0aULL, 0x6eb60222e6a56ab4ULL, 0x87ffbbc4b026ec44ULL, 0xc703a5275b3b90a6ULL, 0x47e699fc9001687fULL, 0x9c8d1aa73a4aa897ULL, 0x7cea3760e1ed12ddULL, 0x4ec80ddd1d2554c5ULL, 0x13e36b957d4cc588ULL, 0x5d2b66486069914dULL, 0x92b90999cc7280b0ULL, 0x517cc9c56259deb5ULL, 0xc937b619ad03b881ULL, 0xec30824ad997f5b2ULL, 0xa45d565fc5aa080bULL, 0xd6837201d27f32f1ULL, 0x635ef3789e9198adULL, 0x531f75769651b96aULL, 0x4f77530a6721e924ULL, 0x486dd4151c3dfdb9ULL, 0x5f48dafb9461f692ULL, 0x375b011173dc355aULL, 0x3da9775470f4d3deULL, 0x8d0dcd81b30e0ac0ULL, 0x36e45fc609d888bbULL, 0x55baacbe97491016ULL, 0x8cb29356c90ab721ULL, 0x76184125e2c5f459ULL, 0x99f4210bb55edbd5ULL, 0x6f095cf59ca1d755ULL, 0x9f51f8c3b44672a9ULL, 0x3538bda287d45285ULL, 0x50c39712185d6354ULL, 0xf23b1885dcefc223ULL, 0x79930ccc6ef9619fULL, 0xed8fdc9da3934853ULL, 0xcb540aaa590bdf5eULL, 0x5c94389f1a6d2cacULL, 0xe77daad8a0bbaed7ULL, 0x28efc5090ca0bf2aULL, 0xbf2ff73c4fc64cd8ULL, 0xb37858b14df60320ULL, 0xf8c96ec0dfc724a7ULL, 0x828680683f329f06ULL, 0x941cd051cd6a29ccULL, 0xc3c5c05cae2b5e05ULL, 0xb601631dc2e27062ULL, 0xc01922382027843bULL, 0x24b86a840e90f0d2ULL, 0xd245177a276ffc52ULL, 0x0f8b4de98c3c95c6ULL, 0x3e759530fef809e0ULL, 0x0b4d2892792c5b65ULL, 0xc4df4743d5374a98ULL, 0xa5e20888bfaeb5eaULL, 0xba56cc90c0d23f9aULL, 0x38d04cf8ffe0a09cULL, 0x62e1adafe495254cULL, 0x0263bcb3f40867dfULL, 0xcaeb547d230f62bfULL, 0x6082111c109d4293ULL, 0xdad4dd8cd04f7d09ULL, 0xefec602e579b2f8cULL, 0x1fb4c4187f7c8a70ULL, 0xffd3e9dfa4db303aULL, 0x7bf0b07f9af10640ULL, 0xf49ec14dddf76b5fULL, 0x8f6e713247066d1fULL, 0x339d646a86ccfbf9ULL, 0x64447467e58d8c30ULL, 0x2c29a072f9b07189ULL, 0xd8b7613f24471ad6ULL, 0x6627c8d41185ebefULL, 0xa347d140beb61c96ULL, 0xde12b8f7255fb3aaULL, 0x9d324470404e1576ULL, 0x9306574eb6763d51ULL, 0xa80af9d2c79a47f3ULL, 0x859c0777442e8b9bULL, 0x69ac853d9db97e29ULL }, { 0xc3407dfc2de6377eULL, 0x5b9e93eea4256f77ULL, 0xadb58fdd50c845e0ULL, 0x5219ff11a75bed86ULL, 0x356b61cfd90b1de9ULL, 0xfb8f406e25abe037ULL, 0x7a5a0231c0f60796ULL, 0x9d3cd216e1f5020bULL, 0x0c6550fb6b48d8f3ULL, 0xf57508c427ff1c62ULL, 0x4ad35ffa71cb407dULL, 0x6290a2da1666aa6dULL, 0xe284ec2349355f9fULL, 0xb3c307c53d7c84ecULL, 0x05e23c0468365a02ULL, 0x190bac4d6c9ebfa8ULL, 0x94bbbee9e28b80faULL, 0xa34fc777529cb9b5ULL, 0xcc7b39f095bcd978ULL, 0x2426addb0ce532e3ULL, 0x7e79329312ce4fc7ULL, 0xab09a72eebec2917ULL, 0xf8d15499f6b9d6c2ULL, 0x1a55b8babf8c895dULL, 0xdb8add17fb769a85ULL, 0xb57f2f368658e81bULL, 0x8acd36f18f3f41f6ULL, 0x5ce3b7bba50f11d3ULL, 0x114dcc14d5ee2f0aULL, 0xb91a7fcded1030e8ULL, 0x81d5425fe55de7a1ULL, 0xb6213bc1554adeeeULL, 0x80144ef95f53f5f2ULL, 0x1e7688186db4c10cULL, 0x3b912965db5fe1bcULL, 0xc281715a97e8252dULL, 0x54a5d7e21c7f8171ULL, 0x4b12535ccbc5522eULL, 0x1d289cefbea6f7f9ULL, 0x6ef5f2217d2e729eULL, 0xe6a7dc819b0d17ceULL, 0x1b94b41c05829b0eULL, 0x33d7493c622f711eULL, 0xdcf7f942fa5ce421ULL, 0x600fba8b7f7a8ecbULL, 0x46b60f011a83988eULL, 0x235b898e0dcf4c47ULL, 0x957ab24f588592a9ULL, 0x4354330572b5c28cULL, 0xa5f3ef84e9b8d542ULL, 0x8c711e02341b2d01ULL, 0x0b1874ae6a62a657ULL, 0x1213d8e306fc19ffULL, 0xfe6d7c6a4d9dba35ULL, 0x65ed868f174cd4c9ULL, 0x88522ea0e6236550ULL, 0x899322065c2d7703ULL, 0xc01e690bfef4018bULL, 0x915982ed8abddaf8ULL, 0xbe675b98ec3a4e4cULL, 0xa996bf7f82f00db1ULL, 0xe1daf8d49a27696aULL, 0x2effd5d3dc8986e7ULL, 0xd153a51f2b1a2e81ULL, 0x18caa0ebd690adfbULL, 0x390e3134b243c51aULL, 0x2778b92cdff70416ULL, 0x029f1851691c24a6ULL, 0x5e7cafeacc133575ULL, 0xfa4e4cc89fa5f264ULL, 0x5a5f9f481e2b7d24ULL, 0x484c47ab18d764dbULL, 0x400a27f2a1a7f479ULL, 0xaeeb9b2a83da7315ULL, 0x721c626879869734ULL, 0x042330a2d2384851ULL, 0x85f672fd3765aff0ULL, 0xba446b3a3e02061dULL, 0x73dd6ecec3888567ULL, 0xffac70ccf793a866ULL, 0xdfa9edb5294ed2d4ULL, 0x6c6aea7014325638ULL, 0x834a5a0e8c41c307ULL, 0xcdba35562fb2cb2bULL, 0x0ad97808d06cb404ULL, 0x0f3b440cb85aee06ULL, 0xe5f9c876481f213bULL, 0x98deee1289c35809ULL, 0x59018bbfcd394bd1ULL, 0xe01bf47220297b39ULL, 0xde68e1139340c087ULL, 0x9fa3ca4788e926adULL, 0xbb85679c840c144eULL, 0x53d8f3b71d55ffd5ULL, 0x0da45c5dd146caa0ULL, 0x6f34fe87c72060cdULL, 0x57fbc315cf6db784ULL, 0xcee421a1fca0fddeULL, 0x3d2d0196607b8d4bULL, 0x642c8a29ad42c69aULL, 0x14aff010bdd87508ULL, 0xac74837beac657b3ULL, 0x3216459ad821634dULL, 0x3fb219c70967a9edULL, 0x06bc28f3bb246cf7ULL, 0xf2082c9126d562c6ULL, 0x66b39278c45ee23cULL, 0xbd394f6f3f2878b9ULL, 0xfd33689d9e8f8cc0ULL, 0x37f4799eb017394fULL, 0x108cc0b26fe03d59ULL, 0xda4bd1b1417888d6ULL, 0xb09d1332ee6eb219ULL, 0x2f3ed975668794b4ULL, 0x58c0871977375982ULL, 0x7561463d78ace990ULL, 0x09876cff037e82f1ULL, 0x7fb83e35a8c05d94ULL, 0x26b9b58a65f91645ULL, 0xef20b07e9873953fULL, 0x3148516d0b3355b8ULL, 0x41cb2b541ba9e62aULL, 0x790416c613e43163ULL, 0xa011d380818e8f40ULL, 0x3a5025c36151f3efULL, 0xd57095bdf92266d0ULL, 0x498d4b0da2d97688ULL, 0x8b0c3a57353153a5ULL, 0x21c491df64d368e1ULL, 0x8f2f0af5e7091bf4ULL, 0x2da1c1240f9bb012ULL, 0xc43d59a92ccc49daULL, 0xbfa6573e56345c1fULL, 0x828b56a8364fd154ULL, 0x9a41f643e0df7cafULL, 0xbcf843c985266aeaULL, 0x2b1de9d7b4bfdce5ULL, 0x20059d79dedd7ab2ULL, 0x6dabe6d6ae3c446bULL, 0x45e81bf6c991ae7bULL, 0x6351ae7cac68b83eULL, 0xa432e32253b6c711ULL, 0xd092a9b991143cd2ULL, 0xcac711032e98b58fULL, 0xd8d4c9e02864ac70ULL, 0xc5fc550f96c25b89ULL, 0xd7ef8dec903e4276ULL, 0x67729ede7e50f06fULL, 0xeac28c7af045cf3dULL, 0xb15c1f945460a04aULL, 0x9cfddeb05bfb1058ULL, 0x93c69abce3a1fe5eULL, 0xeb0380dc4a4bdd6eULL, 0xd20db1e8f8081874ULL, 0x229a8528b7c15e14ULL, 0x44291750739fbc28ULL, 0xd3ccbd4e42060a27ULL, 0xf62b1c33f4ed2a97ULL, 0x86a8660ae4779905ULL, 0xd62e814a2a305025ULL, 0x477703a7a08d8addULL, 0x7b9b0e977af815c5ULL, 0x78c51a60a9ea2330ULL, 0xa6adfb733aaae3b7ULL, 0x97e5aa1e3199b60fULL, 0x0000000000000000ULL, 0xf4b404629df10e31ULL, 0x5564db44a6719322ULL, 0x9207961a59afec0dULL, 0x9624a6b88b97a45cULL, 0x363575380a192b1cULL, 0x2c60cd82b595a241ULL, 0x7d272664c1dc7932ULL, 0x7142769faa94a1c1ULL, 0xa1d0df263b809d13ULL, 0x1630e841d4c451aeULL, 0xc1df65ad44fa13d8ULL, 0x13d2d445bcf20bacULL, 0xd915c546926abe23ULL, 0x38cf3d92084dd749ULL, 0xe766d0272103059dULL, 0xc7634d5effde7f2fULL, 0x077d2455012a7ea4ULL, 0xedbfa82ff16fb199ULL, 0xaf2a978c39d46146ULL, 0x42953fa3c8bbd0dfULL, 0xcb061da59496a7dcULL, 0x25e7a17db6eb20b0ULL, 0x34aa6d6963050fbaULL, 0xa76cf7d580a4f1e4ULL, 0xf7ea10954ee338c4ULL, 0xfcf2643b24819e93ULL, 0xcf252d0746aeef8dULL, 0x4ef06f58a3f3082cULL, 0x563acfb37563a5d7ULL, 0x5086e740ce47c920ULL, 0x2982f186dda3f843ULL, 0x87696aac5e798b56ULL, 0x5d22bb1d1f010380ULL, 0x035e14f7d31236f5ULL, 0x3cec0d30da759f18ULL, 0xf3c920379cdb7095ULL, 0xb8db736b571e22bbULL, 0xdd36f5e44052f672ULL, 0xaac8ab8851e23b44ULL, 0xa857b3d938fe1fe2ULL, 0x17f1e4e76eca43fdULL, 0xec7ea4894b61a3caULL, 0x9e62c6e132e734feULL, 0xd4b1991b432c7483ULL, 0x6ad6c283af163acfULL, 0x1ce9904904a8e5aaULL, 0x5fbda34c761d2726ULL, 0xf910583f4cb7c491ULL, 0xc6a241f845d06d7cULL, 0x4f3163fe19fd1a7fULL, 0xe99c988d2357f9c8ULL, 0x8eee06535d0709a7ULL, 0x0efa48aa0254fc55ULL, 0xb4be23903c56fa48ULL, 0x763f52caabbedf65ULL, 0xeee1bcd8227d876cULL, 0xe345e085f33b4dccULL, 0x3e731561b369bbbeULL, 0x2843fd2067adea10ULL, 0x2adce5710eb1ceb6ULL, 0xb7e03767ef44ccbdULL, 0x8db012a48e153f52ULL, 0x61ceb62dc5749c98ULL, 0xe85d942b9959eb9bULL, 0x4c6f7709caef2c8aULL, 0x84377e5b8d6bbda3ULL, 0x30895dcbb13d47ebULL, 0x74a04a9bc2a2fbc3ULL, 0x6b17ce251518289cULL, 0xe438c4d0f2113368ULL, 0x1fb784bed7bad35fULL, 0x9b80fae55ad16efcULL, 0x77fe5e6c11b0cd36ULL, 0xc858095247849129ULL, 0x08466059b97090a2ULL, 0x01c10ca6ba0e1253ULL, 0x6988d6747c040c3aULL, 0x6849dad2c60a1e69ULL, 0x5147ebe67449db73ULL, 0xc99905f4fd8a837aULL, 0x991fe2b433cd4a5aULL, 0xf09734c04fc94660ULL, 0xa28ecbd1e892abe6ULL, 0xf1563866f5c75433ULL, 0x4dae7baf70e13ed9ULL, 0x7ce62ac27bd26b61ULL, 0x70837a39109ab392ULL, 0x90988e4b30b3c8abULL, 0xb2020b63877296bfULL, 0x156efcb607d6675bULL }, { 0xe63f55ce97c331d0ULL, 0x25b506b0015bba16ULL, 0xc8706e29e6ad9ba8ULL, 0x5b43d3775d521f6aULL, 0x0bfa3d577035106eULL, 0xab95fc172afb0e66ULL, 0xf64b63979e7a3276ULL, 0xf58b4562649dad4bULL, 0x48f7c3dbae0c83f1ULL, 0xff31916642f5c8c5ULL, 0xcbb048dc1c4a0495ULL, 0x66b8f83cdf622989ULL, 0x35c130e908e2b9b0ULL, 0x7c761a61f0b34fa1ULL, 0x3601161cf205268dULL, 0x9e54ccfe2219b7d6ULL, 0x8b7d90a538940837ULL, 0x9cd403588ea35d0bULL, 0xbc3c6fea9ccc5b5aULL, 0xe5ff733b6d24aeedULL, 0xceed22de0f7eb8d2ULL, 0xec8581cab1ab545eULL, 0xb96105e88ff8e71dULL, 0x8ca03501871a5eadULL, 0x76ccce65d6db2a2fULL, 0x5883f582a7b58057ULL, 0x3f7be4ed2e8adc3eULL, 0x0fe7be06355cd9c9ULL, 0xee054e6c1d11be83ULL, 0x1074365909b903a6ULL, 0x5dde9f80b4813c10ULL, 0x4a770c7d02b6692cULL, 0x5379c8d5d7809039ULL, 0xb4067448161ed409ULL, 0x5f5e5026183bd6cdULL, 0xe898029bf4c29df9ULL, 0x7fb63c940a54d09cULL, 0xc5171f897f4ba8bcULL, 0xa6f28db7b31d3d72ULL, 0x2e4f3be7716eaa78ULL, 0x0d6771a099e63314ULL, 0x82076254e41bf284ULL, 0x2f0fd2b42733df98ULL, 0x5c9e76d3e2dc49f0ULL, 0x7aeb569619606cdbULL, 0x83478b07b2468764ULL, 0xcfadcb8d5923cd32ULL, 0x85dac7f05b95a41eULL, 0xb5469d1b4043a1e9ULL, 0xb821ecbbd9a592fdULL, 0x1b8e0b0e798c13c8ULL, 0x62a57b6d9a0be02eULL, 0xfcf1b793b81257f8ULL, 0x9d94ea0bd8fe28ebULL, 0x4cea408aeb654a56ULL, 0x23284a47e888996cULL, 0x2d8f1d128b893545ULL, 0xf4cbac3132c0d8abULL, 0xbd7c86b9ca912ebaULL, 0x3a268eef3dbe6079ULL, 0xf0d62f6077a9110cULL, 0x2735c916ade150cbULL, 0x89fd5f03942ee2eaULL, 0x1acee25d2fd16628ULL, 0x90f39bab41181bffULL, 0x430dfe8cde39939fULL, 0xf70b8ac4c8274796ULL, 0x1c53aeaac6024552ULL, 0x13b410acf35e9c9bULL, 0xa532ab4249faa24fULL, 0x2b1251e5625a163fULL, 0xd7e3e676da4841c7ULL, 0xa7b264e4e5404892ULL, 0xda8497d643ae72d3ULL, 0x861ae105a1723b23ULL, 0x38a6414991048aa4ULL, 0x6578dec92585b6b4ULL, 0x0280cfa6acbaeaddULL, 0x88bdb650c273970aULL, 0x9333bd5ebbff84c2ULL, 0x4e6a8f2c47dfa08bULL, 0x321c954db76cef2aULL, 0x418d312a72837942ULL, 0xb29b38bfffcdf773ULL, 0x6c022c38f90a4c07ULL, 0x5a033a240b0f6a8aULL, 0x1f93885f3ce5da6fULL, 0xc38a537e96988bc6ULL, 0x39e6a81ac759ff44ULL, 0x29929e43cee0fce2ULL, 0x40cdd87924de0ca2ULL, 0xe9d8ebc8a29fe819ULL, 0x0c2798f3cfbb46f4ULL, 0x55e484223e53b343ULL, 0x4650948ecd0d2fd8ULL, 0x20e86cb2126f0651ULL, 0x6d42c56baf5739e7ULL, 0xa06fc1405ace1e08ULL, 0x7babbfc54f3d193bULL, 0x424d17df8864e67fULL, 0xd8045870ef14980eULL, 0xc6d7397c85ac3781ULL, 0x21a885e1443273b1ULL, 0x67f8116f893f5c69ULL, 0x24f5efe35706cff6ULL, 0xd56329d076f2ab1aULL, 0x5e1eb9754e66a32dULL, 0x28d2771098bd8902ULL, 0x8f6013f47dfdc190ULL, 0x17a993fdb637553cULL, 0xe0a219397e1012aaULL, 0x786b9930b5da8606ULL, 0x6e82e39e55b0a6daULL, 0x875a0856f72f4ec3ULL, 0x3741ff4fa458536dULL, 0xac4859b3957558fcULL, 0x7ef6d5c75c09a57cULL, 0xc04a758b6c7f14fbULL, 0xf9acdd91ab26ebbfULL, 0x7391a467c5ef9668ULL, 0x335c7c1ee1319acaULL, 0xa91533b18641e4bbULL, 0xe4bf9a683b79db0dULL, 0x8e20faa72ba0b470ULL, 0x51f907737b3a7ae4ULL, 0x2268a314bed5ec8cULL, 0xd944b123b949edeeULL, 0x31dcb3b84d8b7017ULL, 0xd3fe65279f218860ULL, 0x097af2f1dc8ffab3ULL, 0x9b09a6fc312d0b91ULL, 0xcc6ded78a3c4520fULL, 0x3481d9ba5ebfcc50ULL, 0x4f2a667f1182d56bULL, 0xdfd9fdd4509ace94ULL, 0x26752045fbbc252bULL, 0xbffc491f662bc467ULL, 0xdd593272fc202449ULL, 0x3cbbc218d46d4303ULL, 0x91b372f817456e1fULL, 0x681faf69bc6385a0ULL, 0xb686bbeebaa43ed4ULL, 0x1469b5084cd0ca01ULL, 0x98c98009cbca94acULL, 0x6438379a73d8c354ULL, 0xc2caba2dc0c5fe26ULL, 0x3e3b0dbe78d7a9deULL, 0x50b9ee202d670f04ULL, 0x4590b27b37eab0e5ULL, 0x6025b4cb36b10af3ULL, 0xfb2c1237079c0162ULL, 0xa12f28130c936be8ULL, 0x4b37e52e54eb1cccULL, 0x083a1ba28ad28f53ULL, 0xc10a9cd83a22611bULL, 0x9f1425ad7444c236ULL, 0x069d4cf7e9d3237aULL, 0xedc56899e7f621beULL, 0x778c273680865fcfULL, 0x309c5aeb1bd605f7ULL, 0x8de0dc52d1472b4dULL, 0xf8ec34c2fd7b9e5fULL, 0xea18cd3d58787724ULL, 0xaad515447ca67b86ULL, 0x9989695a9d97e14cULL, 0x0000000000000000ULL, 0xf196c63321f464ecULL, 0x71116bc169557cb5ULL, 0xaf887f466f92c7c1ULL, 0x972e3e0ffe964d65ULL, 0x190ec4a8d536f915ULL, 0x95aef1a9522ca7b8ULL, 0xdc19db21aa7d51a9ULL, 0x94ee18fa0471d258ULL, 0x8087adf248a11859ULL, 0xc457f6da2916dd5cULL, 0xfa6cfb6451c17482ULL, 0xf256e0c6db13fbd1ULL, 0x6a9f60cf10d96f7dULL, 0x4daaa9d9bd383fb6ULL, 0x03c026f5fae79f3dULL, 0xde99148706c7bb74ULL, 0x2a52b8b6340763dfULL, 0x6fc20acd03edd33aULL, 0xd423c08320afdefaULL, 0xbbe1ca4e23420dc0ULL, 0x966ed75ca8cb3885ULL, 0xeb58246e0e2502c4ULL, 0x055d6a021334bc47ULL, 0xa47242111fa7d7afULL, 0xe3623fcc84f78d97ULL, 0x81c744a11efc6db9ULL, 0xaec8961539cfb221ULL, 0xf31609958d4e8e31ULL, 0x63e5923ecc5695ceULL, 0x47107ddd9b505a38ULL, 0xa3afe7b5a0298135ULL, 0x792b7063e387f3e6ULL, 0x0140e953565d75e0ULL, 0x12f4f9ffa503e97bULL, 0x750ce8902c3cb512ULL, 0xdbc47e8515f30733ULL, 0x1ed3610c6ab8af8fULL, 0x5239218681dde5d9ULL, 0xe222d69fd2aaf877ULL, 0xfe71783514a8bd25ULL, 0xcaf0a18f4a177175ULL, 0x61655d9860ec7f13ULL, 0xe77fbc9dc19e4430ULL, 0x2ccff441ddd440a5ULL, 0x16e97aaee06a20dcULL, 0xa855dae2d01c915bULL, 0x1d1347f9905f30b2ULL, 0xb7c652bdecf94b34ULL, 0xd03e43d265c6175dULL, 0xfdb15ec0ee4f2218ULL, 0x57644b8492e9599eULL, 0x07dda5a4bf8e569aULL, 0x54a46d71680ec6a3ULL, 0x5624a2d7c4b42c7eULL, 0xbebca04c3076b187ULL, 0x7d36f332a6ee3a41ULL, 0x3b6667bc6be31599ULL, 0x695f463aea3ef040ULL, 0xad08b0e0c3282d1cULL, 0xb15b1e4a052a684eULL, 0x44d05b2861b7c505ULL, 0x15295c5b1a8dbfe1ULL, 0x744c01c37a61c0f2ULL, 0x59c31cd1f1e8f5b7ULL, 0xef45a73f4b4ccb63ULL, 0x6bdf899c46841a9dULL, 0x3dfb2b4b823036e3ULL, 0xa2ef0ee6f674f4d5ULL, 0x184e2dfb836b8cf5ULL, 0x1134df0a5fe47646ULL, 0xbaa1231d751f7820ULL, 0xd17eaa81339b62bdULL, 0xb01bf71953771daeULL, 0x849a2ea30dc8d1feULL, 0x705182923f080955ULL, 0x0ea757556301ac29ULL, 0x041d83514569c9a7ULL, 0x0abad4042668658eULL, 0x49b72a88f851f611ULL, 0x8a3d79f66ec97dd7ULL, 0xcd2d042bf59927efULL, 0xc930877ab0f0ee48ULL, 0x9273540deda2f122ULL, 0xc797d02fd3f14261ULL, 0xe1e2f06a284d674aULL, 0xd2be8c74c97cfd80ULL, 0x9a494faf67707e71ULL, 0xb3dbd1eca9908293ULL, 0x72d14d3493b2e388ULL, 0xd6a30f258c153427ULL } }; /* Ax */ static void streebog_xor(const struct streebog_uint512 *x, const struct streebog_uint512 *y, struct streebog_uint512 *z) { z->qword[0] = x->qword[0] ^ y->qword[0]; z->qword[1] = x->qword[1] ^ y->qword[1]; z->qword[2] = x->qword[2] ^ y->qword[2]; z->qword[3] = x->qword[3] ^ y->qword[3]; z->qword[4] = x->qword[4] ^ y->qword[4]; z->qword[5] = x->qword[5] ^ y->qword[5]; z->qword[6] = x->qword[6] ^ y->qword[6]; z->qword[7] = x->qword[7] ^ y->qword[7]; } static void streebog_xlps(const struct streebog_uint512 *x, const struct streebog_uint512 *y, struct streebog_uint512 *data) { u64 r0, r1, r2, r3, r4, r5, r6, r7; int i; r0 = le64_to_cpu(x->qword[0] ^ y->qword[0]); r1 = le64_to_cpu(x->qword[1] ^ y->qword[1]); r2 = le64_to_cpu(x->qword[2] ^ y->qword[2]); r3 = le64_to_cpu(x->qword[3] ^ y->qword[3]); r4 = le64_to_cpu(x->qword[4] ^ y->qword[4]); r5 = le64_to_cpu(x->qword[5] ^ y->qword[5]); r6 = le64_to_cpu(x->qword[6] ^ y->qword[6]); r7 = le64_to_cpu(x->qword[7] ^ y->qword[7]); for (i = 0; i <= 7; i++) { data->qword[i] = cpu_to_le64(Ax[0][r0 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[1][r1 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[2][r2 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[3][r3 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[4][r4 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[5][r5 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[6][r6 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[7][r7 & 0xFF]); r0 >>= 8; r1 >>= 8; r2 >>= 8; r3 >>= 8; r4 >>= 8; r5 >>= 8; r6 >>= 8; r7 >>= 8; } } static void streebog_round(int i, struct streebog_uint512 *Ki, struct streebog_uint512 *data) { streebog_xlps(Ki, &C[i], Ki); streebog_xlps(Ki, data, data); } static int streebog_init(struct shash_desc *desc) { struct streebog_state *ctx = shash_desc_ctx(desc); unsigned int digest_size = crypto_shash_digestsize(desc->tfm); unsigned int i; memset(ctx, 0, sizeof(struct streebog_state)); for (i = 0; i < 8; i++) { if (digest_size == STREEBOG256_DIGEST_SIZE) ctx->h.qword[i] = cpu_to_le64(0x0101010101010101ULL); } return 0; } static void streebog_pad(struct streebog_state *ctx) { if (ctx->fillsize >= STREEBOG_BLOCK_SIZE) return; memset(ctx->buffer + ctx->fillsize, 0, sizeof(ctx->buffer) - ctx->fillsize); ctx->buffer[ctx->fillsize] = 1; } static void streebog_add512(const struct streebog_uint512 *x, const struct streebog_uint512 *y, struct streebog_uint512 *r) { u64 carry = 0; int i; for (i = 0; i < 8; i++) { const u64 left = le64_to_cpu(x->qword[i]); u64 sum; sum = left + le64_to_cpu(y->qword[i]) + carry; if (sum != left) carry = (sum < left); r->qword[i] = cpu_to_le64(sum); } } static void streebog_g(struct streebog_uint512 *h, const struct streebog_uint512 *N, const struct streebog_uint512 *m) { struct streebog_uint512 Ki, data; unsigned int i; streebog_xlps(h, N, &data); /* Starting E() */ Ki = data; streebog_xlps(&Ki, m, &data); for (i = 0; i < 11; i++) streebog_round(i, &Ki, &data); streebog_xlps(&Ki, &C[11], &Ki); streebog_xor(&Ki, &data, &data); /* E() done */ streebog_xor(&data, h, &data); streebog_xor(&data, m, h); } static void streebog_stage2(struct streebog_state *ctx, const u8 *data) { struct streebog_uint512 m; memcpy(&m, data, sizeof(m)); streebog_g(&ctx->h, &ctx->N, &m); streebog_add512(&ctx->N, &buffer512, &ctx->N); streebog_add512(&ctx->Sigma, &m, &ctx->Sigma); } static void streebog_stage3(struct streebog_state *ctx) { struct streebog_uint512 buf = { { 0 } }; buf.qword[0] = cpu_to_le64(ctx->fillsize << 3); streebog_pad(ctx); streebog_g(&ctx->h, &ctx->N, &ctx->m); streebog_add512(&ctx->N, &buf, &ctx->N); streebog_add512(&ctx->Sigma, &ctx->m, &ctx->Sigma); streebog_g(&ctx->h, &buffer0, &ctx->N); streebog_g(&ctx->h, &buffer0, &ctx->Sigma); memcpy(&ctx->hash, &ctx->h, sizeof(struct streebog_uint512)); } static int streebog_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct streebog_state *ctx = shash_desc_ctx(desc); size_t chunksize; if (ctx->fillsize) { chunksize = STREEBOG_BLOCK_SIZE - ctx->fillsize; if (chunksize > len) chunksize = len; memcpy(&ctx->buffer[ctx->fillsize], data, chunksize); ctx->fillsize += chunksize; len -= chunksize; data += chunksize; if (ctx->fillsize == STREEBOG_BLOCK_SIZE) { streebog_stage2(ctx, ctx->buffer); ctx->fillsize = 0; } } while (len >= STREEBOG_BLOCK_SIZE) { streebog_stage2(ctx, data); data += STREEBOG_BLOCK_SIZE; len -= STREEBOG_BLOCK_SIZE; } if (len) { memcpy(&ctx->buffer, data, len); ctx->fillsize = len; } return 0; } static int streebog_final(struct shash_desc *desc, u8 *digest) { struct streebog_state *ctx = shash_desc_ctx(desc); streebog_stage3(ctx); ctx->fillsize = 0; if (crypto_shash_digestsize(desc->tfm) == STREEBOG256_DIGEST_SIZE) memcpy(digest, &ctx->hash.qword[4], STREEBOG256_DIGEST_SIZE); else memcpy(digest, &ctx->hash.qword[0], STREEBOG512_DIGEST_SIZE); return 0; } static struct shash_alg algs[2] = { { .digestsize = STREEBOG256_DIGEST_SIZE, .init = streebog_init, .update = streebog_update, .final = streebog_final, .descsize = sizeof(struct streebog_state), .base = { .cra_name = "streebog256", .cra_driver_name = "streebog256-generic", .cra_blocksize = STREEBOG_BLOCK_SIZE, .cra_module = THIS_MODULE, }, }, { .digestsize = STREEBOG512_DIGEST_SIZE, .init = streebog_init, .update = streebog_update, .final = streebog_final, .descsize = sizeof(struct streebog_state), .base = { .cra_name = "streebog512", .cra_driver_name = "streebog512-generic", .cra_blocksize = STREEBOG_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static int __init streebog_mod_init(void) { return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } static void __exit streebog_mod_fini(void) { crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } subsys_initcall(streebog_mod_init); module_exit(streebog_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vitaly Chikunov <vt@altlinux.org>"); MODULE_DESCRIPTION("Streebog Hash Function"); MODULE_ALIAS_CRYPTO("streebog256"); MODULE_ALIAS_CRYPTO("streebog256-generic"); MODULE_ALIAS_CRYPTO("streebog512"); MODULE_ALIAS_CRYPTO("streebog512-generic");
16 782 492 298 1 79 1289 79 536 535 534 423 430 430 430 419 430 149 85 51 492 732 336 561 336 88 88 88 88 88 26 31 31 30 17 77 1306 16 504 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> */ #ifndef _IP6_FIB_H #define _IP6_FIB_H #include <linux/ipv6_route.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/notifier.h> #include <net/dst.h> #include <net/flow.h> #include <net/ip_fib.h> #include <net/netlink.h> #include <net/inetpeer.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> #include <uapi/linux/bpf.h> #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 #else #define FIB6_TABLE_HASHSZ 1 #endif #define RT6_DEBUG 2 struct rt6_info; struct fib6_info; struct fib6_config { u32 fc_table; u32 fc_metric; int fc_dst_len; int fc_src_len; int fc_ifindex; u32 fc_flags; u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, fc_ignore_dev_down:1, __unused : 14; u32 fc_nh_id; struct in6_addr fc_dst; struct in6_addr fc_src; struct in6_addr fc_prefsrc; struct in6_addr fc_gateway; unsigned long fc_expires; struct nlattr *fc_mx; int fc_mx_len; int fc_mp_len; struct nlattr *fc_mp; struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; bool fc_is_fdb; }; struct fib6_node { struct fib6_node __rcu *parent; struct fib6_node __rcu *left; struct fib6_node __rcu *right; #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; struct fib6_gc_args { int timeout; int more; }; #ifndef CONFIG_IPV6_SUBTREES #define FIB6_SUBTREE(fn) NULL static inline bool fib6_routes_require_src(const struct net *net) { return false; } static inline void fib6_routes_require_src_inc(struct net *net) {} static inline void fib6_routes_require_src_dec(struct net *net) {} #else static inline bool fib6_routes_require_src(const struct net *net) { return net->ipv6.fib6_routes_require_src > 0; } static inline void fib6_routes_require_src_inc(struct net *net) { net->ipv6.fib6_routes_require_src++; } static inline void fib6_routes_require_src_dec(struct net *net) { net->ipv6.fib6_routes_require_src--; } #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif /* * routing information * */ struct rt6key { struct in6_addr addr; int plen; }; struct fib6_table; struct rt6_exception_bucket { struct hlist_head chain; int depth; }; struct rt6_exception { struct hlist_node hlist; struct rt6_info *rt6i; unsigned long stamp; struct rcu_head rcu; }; #define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10 #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 struct fib6_nh { struct fib_nh_common nh_common; #ifdef CONFIG_IPV6_ROUTER_PREF unsigned long last_probe; #endif struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; }; struct fib6_info { struct fib6_table *fib6_table; struct fib6_info __rcu *fib6_next; struct fib6_node __rcu *fib6_node; /* Multipath routes: * siblings is a list of fib6_info that have the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ union { struct list_head fib6_siblings; struct list_head nh_list; }; unsigned int fib6_nsiblings; refcount_t fib6_ref; unsigned long expires; struct hlist_node gc_link; struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] struct rt6key fib6_dst; u32 fib6_flags; struct rt6key fib6_src; struct rt6key fib6_prefsrc; u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; u8 offload; u8 trap; u8 offload_failed; u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, fib6_destroying:1, unused:4; struct rcu_head rcu; struct nexthop *nh; struct fib6_nh fib6_nh[]; }; struct rt6_info { struct dst_entry dst; struct fib6_info __rcu *from; int sernum; struct rt6key rt6i_dst; struct rt6key rt6i_src; struct in6_addr rt6i_gateway; struct inet6_dev *rt6i_idev; u32 rt6i_flags; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; }; struct fib6_result { struct fib6_nh *nh; struct fib6_info *f6i; u32 fib6_flags; u8 fib6_type; struct rt6_info *rt6; }; #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ rt = rcu_dereference(rt->fib6_next)) #define for_each_fib6_walker_rt(w) \ for (rt = (w)->leaf; rt; \ rt = rcu_dereference_protected(rt->fib6_next, 1)) #define dst_rt6_info(_ptr) container_of_const(_ptr, struct rt6_info, dst) static inline struct inet6_dev *ip6_dst_idev(const struct dst_entry *dst) { return dst_rt6_info(dst)->rt6i_idev; } static inline bool fib6_requires_src(const struct fib6_info *rt) { return rt->fib6_src.plen > 0; } /* The callers should hold f6i->fib6_table->tb6_lock if a route has ever * been added to a table before. */ static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; f6i->expires = 0; } /* The callers should hold f6i->fib6_table->tb6_lock if a route has ever * been added to a table before. */ static inline void fib6_set_expires(struct fib6_info *f6i, unsigned long expires) { f6i->expires = expires; f6i->fib6_flags |= RTF_EXPIRES; } static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) return time_after(jiffies, f6i->expires); return false; } /* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely * Return false if not */ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, u32 *cookie) { struct fib6_node *fn; bool status = false; fn = rcu_dereference(f6i->fib6_node); if (fn) { *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; } return status; } static inline u32 rt6_get_cookie(const struct rt6_info *rt) { struct fib6_info *from; u32 cookie = 0; if (rt->sernum) return rt->sernum; rcu_read_lock(); from = rcu_dereference(rt->from); if (from) fib6_get_cookie_safe(from, &cookie); rcu_read_unlock(); return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. * We rely on dst being first structure in struct rt6_info */ BUILD_BUG_ON(offsetof(struct rt6_info, dst) != 0); dst_release(&rt->dst); } struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh); void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) { refcount_inc(&f6i->fib6_ref); } static inline bool fib6_info_hold_safe(struct fib6_info *f6i) { return refcount_inc_not_zero(&f6i->fib6_ref); } static inline void fib6_info_release(struct fib6_info *f6i) { if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); call_rcu_hurry(&f6i->rcu, fib6_info_destroy_rcu); } } enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif FWS_L, FWS_R, FWS_C, FWS_U }; struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; unsigned int skip_in_node; int (*func)(struct fib6_walker *); void *args; }; struct rt6_statistics { __u32 fib_nodes; /* all fib6 nodes */ __u32 fib_route_nodes; /* intermediate nodes */ __u32 fib_rt_entries; /* rt entries in fib table */ __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ /* The following stat is not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ }; #define RTN_TL_ROOT 0x0001 #define RTN_ROOT 0x0002 /* tree root node */ #define RTN_RTINFO 0x0004 /* node with valid routing info */ /* * priority levels (or metrics) * */ struct fib6_table { struct hlist_node tb6_hlist; u32 tb6_id; spinlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; /* writes protected by rtnl_mutex */ struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC #define RT6_TABLE_MAIN RT_TABLE_MAIN #define RT6_TABLE_DFLT RT6_TABLE_MAIN #define RT6_TABLE_INFO RT6_TABLE_MAIN #define RT6_TABLE_PREFIX RT6_TABLE_MAIN #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_MIN 1 #define FIB6_TABLE_MAX RT_TABLE_MAX #define RT6_TABLE_LOCAL RT_TABLE_LOCAL #else #define FIB6_TABLE_MIN RT_TABLE_MAIN #define FIB6_TABLE_MAX FIB6_TABLE_MIN #define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, const struct sk_buff *, int); struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib6_info *rt; unsigned int nsiblings; }; /* * exported functions */ struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id); struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); /* called with rcu lock held; can return error pointer * caller needs to select path */ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, struct fib6_result *res, int flags); /* called with rcu lock held; caller needs to select path */ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, struct fib6_result *res, int strict); void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict); struct fib6_node *fib6_node_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr); struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, const struct in6_addr *saddr, int src_len, bool exact_match); void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); void fib6_clean_all_skip_notify(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); int fib6_del(struct fib6_info *rt, struct nl_info *info); static inline void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) { const struct fib6_info *from; rcu_read_lock(); from = rcu_dereference(rt->from); if (from) *addr = from->fib6_prefsrc.addr; else *addr = in6addr_any; rcu_read_unlock(); } int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, struct netlink_ext_ack *extack); int call_fib6_multipath_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, unsigned int nsiblings, struct netlink_ext_ack *extack); int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt); void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); void fib6_gc_cleanup(void); int fib6_init(void); /* Add the route to the gc list if it is not already there * * The callers should hold f6i->fib6_table->tb6_lock. */ static inline void fib6_add_gc_list(struct fib6_info *f6i) { /* If fib6_node is null, the f6i is not in (or removed from) the * table. * * There is a gap between finding the f6i from the table and * calling this function without the protection of the tb6_lock. * This check makes sure the f6i is not added to the gc list when * it is not on the table. */ if (!rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock))) return; if (hlist_unhashed(&f6i->gc_link)) hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist); } /* Remove the route from the gc list if it is on the list. * * The callers should hold f6i->fib6_table->tb6_lock. */ static inline void fib6_remove_gc_list(struct fib6_info *f6i) { if (!hlist_unhashed(&f6i->gc_link)) hlist_del_init(&f6i->gc_link); } struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; loff_t skip; struct fib6_table *tbl; int sernum; }; extern const struct seq_operations ipv6_route_seq_ops; int call_fib6_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(const struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) { return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); } void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload, bool trap, bool offload_failed); #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct fib6_info *, rt); }; #endif INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags)); static inline struct rt6_info *pol_lookup_func(pol_lookup_t lookup, struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { return INDIRECT_CALL_4(lookup, ip6_pol_route_output, ip6_pol_route_input, ip6_pol_route_lookup, __ip6_route_redirect, net, table, fl6, skb, flags); } #ifdef CONFIG_IPV6_MULTIPLE_TABLES static inline bool fib6_has_custom_rules(const struct net *net) { return net->ipv6.fib6_has_custom_rules; } int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); unsigned int fib6_rules_seq_read(const struct net *net); static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi6 *fl6, struct flow_keys *flkeys) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; if (!net->ipv6.fib6_rules_require_fldissect) return false; memset(flkeys, 0, sizeof(*flkeys)); __skb_flow_dissect(net, skb, &flow_keys_dissector, flkeys, NULL, 0, 0, 0, flag); fl6->fl6_sport = flkeys->ports.src; fl6->fl6_dport = flkeys->ports.dst; fl6->flowi6_proto = flkeys->basic.ip_proto; return true; } #else static inline bool fib6_has_custom_rules(const struct net *net) { return false; } static inline int fib6_rules_init(void) { return 0; } static inline void fib6_rules_cleanup(void) { return ; } static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static inline unsigned int fib6_rules_seq_read(const struct net *net) { return 0; } static inline bool fib6_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi6 *fl6, struct flow_keys *flkeys) { return false; } #endif #endif
5 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 /* SPDX-License-Identifier: GPL-2.0 */ /* * thermal_core.h * * Copyright (C) 2012 Intel Corp * Author: Durgadoss R <durgadoss.r@intel.com> */ #ifndef __THERMAL_CORE_H__ #define __THERMAL_CORE_H__ #include <linux/cleanup.h> #include <linux/device.h> #include <linux/thermal.h> #include "thermal_netlink.h" #include "thermal_thresholds.h" #include "thermal_debugfs.h" struct thermal_attr { struct device_attribute attr; char name[THERMAL_NAME_LENGTH]; }; struct thermal_trip_attrs { struct thermal_attr type; struct thermal_attr temp; struct thermal_attr hyst; }; struct thermal_trip_desc { struct thermal_trip trip; struct thermal_trip_attrs trip_attrs; struct list_head list_node; struct list_head thermal_instances; int threshold; }; /** * struct thermal_governor - structure that holds thermal governor information * @name: name of the governor * @bind_to_tz: callback called when binding to a thermal zone. If it * returns 0, the governor is bound to the thermal zone, * otherwise it fails. * @unbind_from_tz: callback called when a governor is unbound from a * thermal zone. * @trip_crossed: called for trip points that have just been crossed * @manage: called on thermal zone temperature updates * @update_tz: callback called when thermal zone internals have changed, e.g. * thermal cooling instance was added/removed * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { const char *name; int (*bind_to_tz)(struct thermal_zone_device *tz); void (*unbind_from_tz)(struct thermal_zone_device *tz); void (*trip_crossed)(struct thermal_zone_device *tz, const struct thermal_trip *trip, bool upward); void (*manage)(struct thermal_zone_device *tz); void (*update_tz)(struct thermal_zone_device *tz, enum thermal_notify_event reason); struct list_head governor_list; }; #define TZ_STATE_FLAG_SUSPENDED BIT(0) #define TZ_STATE_FLAG_RESUMING BIT(1) #define TZ_STATE_FLAG_INIT BIT(2) #define TZ_STATE_FLAG_EXIT BIT(3) #define TZ_STATE_READY 0 /** * struct thermal_zone_device - structure for a thermal zone * @id: unique id number for each thermal zone * @type: the thermal zone device type * @device: &struct device for this thermal zone * @removal: removal completion * @resume: resume completion * @trips_high: trips above the current zone temperature * @trips_reached: trips below or at the current zone temperature * @trips_invalid: trips with invalid temperature * @mode: current mode of this thermal zone * @devdata: private pointer for device private data * @num_trips: number of trip points the thermal zone supports * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) * @recheck_delay_jiffies: delay after a failed attempt to determine the zone * temperature before trying again * @temperature: current temperature. This is only for core code, * drivers should use thermal_zone_get_temp() to get the * current temperature * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. * @prev_low_trip: the low current temperature if you've crossed a passive trip point. * @prev_high_trip: the above current temperature if you've crossed a passive trip point. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone * @governor_data: private pointer for governor data * @ida: &struct ida to generate unique id for this zone's cooling * devices * @lock: lock to protect thermal_instances list * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling * @notify_event: Last notification event * @state: current state of the thermal zone * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; struct device device; struct completion removal; struct completion resume; struct attribute_group trips_attribute_group; struct list_head trips_high; struct list_head trips_reached; struct list_head trips_invalid; enum thermal_device_mode mode; void *devdata; int num_trips; unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; unsigned long recheck_delay_jiffies; int temperature; int last_temperature; int emul_temperature; int passive; int prev_low_trip; int prev_high_trip; struct thermal_zone_device_ops ops; struct thermal_zone_params *tzp; struct thermal_governor *governor; void *governor_data; struct ida ida; struct mutex lock; struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; u8 state; #ifdef CONFIG_THERMAL_DEBUGFS struct thermal_debugfs *debugfs; #endif struct list_head user_thresholds; struct thermal_trip_desc trips[] __counted_by(num_trips); }; DEFINE_GUARD(thermal_zone, struct thermal_zone_device *, mutex_lock(&_T->lock), mutex_unlock(&_T->lock)) DEFINE_GUARD(thermal_zone_reverse, struct thermal_zone_device *, mutex_unlock(&_T->lock), mutex_lock(&_T->lock)) /* Initial thermal zone temperature. */ #define THERMAL_TEMP_INIT INT_MIN /* * Default and maximum delay after a failed thermal zone temperature check * before attempting to check it again (in jiffies). */ #define THERMAL_RECHECK_DELAY msecs_to_jiffies(250) #define THERMAL_MAX_RECHECK_DELAY (120 * HZ) /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE) #define DEFAULT_THERMAL_GOVERNOR "fair_share" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) #define DEFAULT_THERMAL_GOVERNOR "user_space" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) #define DEFAULT_THERMAL_GOVERNOR "power_allocator" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_BANG_BANG) #define DEFAULT_THERMAL_GOVERNOR "bang_bang" #endif /* Initial state of a cooling device during binding */ #define THERMAL_NO_TARGET -1UL /* Init section thermal table */ extern struct thermal_governor *__governor_thermal_table[]; extern struct thermal_governor *__governor_thermal_table_end[]; #define THERMAL_TABLE_ENTRY(table, name) \ static typeof(name) *__thermal_table_entry_##name \ __used __section("__" #table "_thermal_table") = &name #define THERMAL_GOVERNOR_DECLARE(name) THERMAL_TABLE_ENTRY(governor, name) #define for_each_governor_table(__governor) \ for (__governor = __governor_thermal_table; \ __governor < __governor_thermal_table_end; \ __governor++) int for_each_thermal_zone(int (*cb)(struct thermal_zone_device *, void *), void *); int for_each_thermal_cooling_device(int (*cb)(struct thermal_cooling_device *, void *), void *); int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *), void *thermal_governor); struct thermal_zone_device *thermal_zone_get_by_id(int id); DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *, if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id) static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) { return cdev->ops->get_requested_power && cdev->ops->state2power && cdev->ops->power2state; } void thermal_cdev_update(struct thermal_cooling_device *); void thermal_cdev_update_nocheck(struct thermal_cooling_device *cdev); void __thermal_cdev_update(struct thermal_cooling_device *cdev); int get_tz_trend(struct thermal_zone_device *tz, const struct thermal_trip *trip); /* * This structure is used to describe the behavior of * a certain cooling device on a certain trip point * in a certain thermal zone */ struct thermal_instance { int id; char name[THERMAL_NAME_LENGTH]; struct thermal_cooling_device *cdev; const struct thermal_trip *trip; bool initialized; unsigned long upper; /* Highest cooling state for this trip point */ unsigned long lower; /* Lowest cooling state for this trip point */ unsigned long target; /* expected cooling state */ char attr_name[THERMAL_NAME_LENGTH]; struct device_attribute attr; char weight_attr_name[THERMAL_NAME_LENGTH]; struct device_attribute weight_attr; struct list_head trip_node; /* node in trip->thermal_instances */ struct list_head cdev_node; /* node in cdev->thermal_instances */ unsigned int weight; /* The weight of the cooling device */ bool upper_no_limit; }; #define to_thermal_zone(_dev) \ container_of(_dev, struct thermal_zone_device, device) #define to_cooling_device(_dev) \ container_of(_dev, struct thermal_cooling_device, device) int thermal_register_governor(struct thermal_governor *); void thermal_unregister_governor(struct thermal_governor *); int thermal_zone_device_set_policy(struct thermal_zone_device *, char *); int thermal_build_list_of_policies(char *buf); void __thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event); void thermal_zone_device_critical_reboot(struct thermal_zone_device *tz); void thermal_governor_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason); /* Helpers */ #define for_each_trip_desc(__tz, __td) \ for (__td = __tz->trips; __td - __tz->trips < __tz->num_trips; __td++) #define trip_to_trip_desc(__trip) \ container_of(__trip, struct thermal_trip_desc, trip) const char *thermal_trip_type_name(enum thermal_trip_type trip_type); void thermal_zone_set_trips(struct thermal_zone_device *tz, int low, int high); int thermal_zone_trip_id(const struct thermal_zone_device *tz, const struct thermal_trip *trip); int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); void thermal_zone_set_trip_hyst(struct thermal_zone_device *tz, struct thermal_trip *trip, int hyst); /* sysfs I/F */ int thermal_zone_create_device_groups(struct thermal_zone_device *tz); void thermal_zone_destroy_device_groups(struct thermal_zone_device *); void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *); void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev); void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev); /* used only at binding time */ ssize_t trip_point_show(struct device *, struct device_attribute *, char *); ssize_t weight_show(struct device *, struct device_attribute *, char *); ssize_t weight_store(struct device *, struct device_attribute *, const char *, size_t); #ifdef CONFIG_THERMAL_STATISTICS void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, unsigned long new_state); #else static inline void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, unsigned long new_state) {} #endif /* CONFIG_THERMAL_STATISTICS */ #endif /* __THERMAL_CORE_H__ */
7943 213 213 213 205 32 213 32 213 2969 2967 3045 2872 203 205 205 205 205 205 205 205 205 2796 273 269 5 273 271 271 271 140 9 9 9 9 289 286 5785 5791 5775 5781 2855 2860 4380 4381 7941 7963 9 288 287 286 7710 7712 7740 7738 2536 7404 7401 7393 7400 7398 1131 7403 1577 6118 9 5 1575 1452 5 1449 1431 1446 20 2 20 6 134 6248 4511 268 4322 3 4328 4316 49 5269 5588 141 141 140 140 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1995 Linus Torvalds * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/memblock.h> /* max_low_pfn */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ #include <linux/prefetch.h> /* prefetchw */ #include <linux/context_tracking.h> /* exception_enter(), ... */ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> #include <linux/mm.h> /* find_and_lock_vma() */ #include <linux/vmalloc.h> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/fixmap.h> /* VSYSCALL_ADDR */ #include <asm/vsyscall.h> /* emulate_vsyscall */ #include <asm/vm86.h> /* struct vm86 */ #include <asm/mmu_context.h> /* vma_pkey() */ #include <asm/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <asm/desc.h> /* store_idt(), ... */ #include <asm/cpu_entry_area.h> /* exception stack */ #include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ #include <asm/kvm_para.h> /* kvm_handle_async_pf */ #include <asm/vdso.h> /* fixup_vdso_exception() */ #include <asm/irq_stack.h> #include <asm/fred.h> #include <asm/sev.h> /* snp_dump_hva_rmpentry() */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> /* * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ static nokprobe_inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; return 0; } /* * Prefetch quirks: * * 32-bit mode: * * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. * Check that here and ignore it. This is AMD erratum #91. * * 64-bit mode: * * Sometimes the CPU reports invalid exceptions on prefetch. * Check that here and ignore it. * * Opcode checker based on code by Richard Brunner. */ static inline int check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, unsigned char opcode, int *prefetch) { unsigned char instr_hi = opcode & 0xf0; unsigned char instr_lo = opcode & 0x0f; switch (instr_hi) { case 0x20: case 0x30: /* * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. * In X86_64 long mode, the CPU will signal invalid * opcode if some of these prefixes are present so * X86_64 will never get here anyway */ return ((instr_lo & 7) == 0x6); #ifdef CONFIG_X86_64 case 0x40: /* * In 64-bit mode 0x40..0x4F are valid REX prefixes */ return (!user_mode(regs) || user_64bit_mode(regs)); #endif case 0x60: /* 0x64 thru 0x67 are valid prefixes in all modes. */ return (instr_lo & 0xC) == 0x4; case 0xF0: /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ if (get_kernel_nofault(opcode, instr)) return 0; *prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); return 0; default: return 0; } } static bool is_amd_k8_pre_npt(void) { struct cpuinfo_x86 *c = &boot_cpu_data; return unlikely(IS_ENABLED(CONFIG_CPU_SUP_AMD) && c->x86_vendor == X86_VENDOR_AMD && c->x86 == 0xf && c->x86_model < 0x40); } static int is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) { unsigned char *max_instr; unsigned char *instr; int prefetch = 0; /* Erratum #91 affects AMD K8, pre-NPT CPUs */ if (!is_amd_k8_pre_npt()) return 0; /* * If it was a exec (instruction fetch) fault on NX page, then * do not ignore the fault: */ if (error_code & X86_PF_INSTR) return 0; instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; /* * This code has historically always bailed out if IP points to a * not-present page (e.g. due to a race). No one has ever * complained about this. */ pagefault_disable(); while (instr < max_instr) { unsigned char opcode; if (user_mode(regs)) { if (get_user(opcode, (unsigned char __user *) instr)) break; } else { if (get_kernel_nofault(opcode, instr)) break; } instr++; if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) break; } pagefault_enable(); return prefetch; } DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); #ifdef CONFIG_X86_32 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) { unsigned index = pgd_index(address); pgd_t *pgd_k; p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pgd += index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) return NULL; /* * set_pgd(pgd, *pgd_k); here would be useless on PAE * and redundant with the set_pmd() on non-PAE. As would * set_p4d/set_pud. */ p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); if (!p4d_present(*p4d_k)) return NULL; pud = pud_offset(p4d, address); pud_k = pud_offset(p4d_k, address); if (!pud_present(*pud_k)) return NULL; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (pmd_present(*pmd) != pmd_present(*pmd_k)) set_pmd(pmd, *pmd_k); if (!pmd_present(*pmd_k)) return NULL; else BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k)); return pmd_k; } /* * Handle a fault on the vmalloc or module mapping area * * This is needed because there is a race condition between the time * when the vmalloc mapping code updates the PMD to the point in time * where it synchronizes this update with the other page-tables in the * system. * * In this race window another thread/CPU can map an area on the same * PMD, finds it already present and does not synchronize it with the * rest of the system yet. As a result v[mz]alloc might return areas * which are not mapped in every page-table in the system, causing an * unhandled page-fault when they are accessed. */ static noinline int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; pte_t *pte_k; /* Make sure we are in vmalloc area: */ if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "current" here. We might be inside * an interrupt in the middle of a task switch.. */ pgd_paddr = read_cr3_pa(); pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; if (pmd_leaf(*pmd_k)) return 0; pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; return 0; } NOKPROBE_SYMBOL(vmalloc_fault); void arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; for (addr = start & PMD_MASK; addr >= TASK_SIZE_MAX && addr < VMALLOC_END; addr += PMD_SIZE) { struct page *page; spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); vmalloc_sync_one(page_address(page), addr); spin_unlock(pgt_lock); } spin_unlock(&pgd_lock); } } static bool low_pfn(unsigned long pfn) { return pfn < max_low_pfn; } static void dump_pagetable(unsigned long address) { pgd_t *base = __va(read_cr3_pa()); pgd_t *pgd = &base[pgd_index(address)]; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; #ifdef CONFIG_X86_PAE pr_info("*pdpt = %016Lx ", pgd_val(*pgd)); if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) goto out; #define pr_pde pr_cont #else #define pr_pde pr_info #endif p4d = p4d_offset(pgd, address); pud = pud_offset(p4d, address); pmd = pmd_offset(pud, address); pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); #undef pr_pde /* * We must not directly access the pte in the highpte * case if the page table is located in highmem. * And let's rather not kmap-atomic the pte, just in case * it's allocated already: */ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_leaf(*pmd)) goto out; pte = pte_offset_kernel(pmd, address); pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); out: pr_cont("\n"); } #else /* CONFIG_X86_64: */ #ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" "******* Working around it, but it may cause SEGVs or burn power.\n" "******* Please consider a BIOS update.\n" "******* Disabling USB legacy in the BIOS may also help.\n"; #endif static int bad_address(void *p) { unsigned long dummy; return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long address) { pgd_t *base = __va(read_cr3_pa()); pgd_t *pgd = base + pgd_index(address); p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; if (bad_address(pgd)) goto bad; pr_info("PGD %lx ", pgd_val(*pgd)); if (!pgd_present(*pgd)) goto out; p4d = p4d_offset(pgd, address); if (bad_address(p4d)) goto bad; pr_cont("P4D %lx ", p4d_val(*p4d)); if (!p4d_present(*p4d) || p4d_leaf(*p4d)) goto out; pud = pud_offset(p4d, address); if (bad_address(pud)) goto bad; pr_cont("PUD %lx ", pud_val(*pud)); if (!pud_present(*pud) || pud_leaf(*pud)) goto out; pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; pr_cont("PMD %lx ", pmd_val(*pmd)); if (!pmd_present(*pmd) || pmd_leaf(*pmd)) goto out; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; pr_cont("PTE %lx", pte_val(*pte)); out: pr_cont("\n"); return; bad: pr_info("BAD\n"); } #endif /* CONFIG_X86_64 */ /* * Workaround for K8 erratum #93 & buggy BIOS. * * BIOS SMM functions are required to use a specific workaround * to avoid corruption of the 64bit RIP register on C stepping K8. * * A lot of BIOS that didn't get tested properly miss this. * * The OS sees this as a page fault with the upper 32bits of RIP cleared. * Try to work around it here. * * Note we only handle faults in kernel here. * Does nothing on 32-bit. */ static int is_errata93(struct pt_regs *regs, unsigned long address) { #if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD || boot_cpu_data.x86 != 0xf) return 0; if (user_mode(regs)) return 0; if (address != regs->ip) return 0; if ((address >> 32) != 0) return 0; address |= 0xffffffffUL << 32; if ((address >= (u64)_stext && address <= (u64)_etext) || (address >= MODULES_VADDR && address <= MODULES_END)) { printk_once(errata93_warning); regs->ip = address; return 1; } #endif return 0; } /* * Work around K8 erratum #100 K8 in compat mode occasionally jumps * to illegal addresses >4GB. * * We catch this in the page fault handler because these addresses * are not reachable. Just detect this case and return. Any code * segment in LDT is compatibility mode. */ static int is_errata100(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) return 1; #endif return 0; } /* Pentium F0 0F C7 C8 bug workaround: */ static int is_f00f_bug(struct pt_regs *regs, unsigned long error_code, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG if (boot_cpu_has_bug(X86_BUG_F00F) && !(error_code & X86_PF_USER) && idt_is_f00f_address(address)) { handle_invalid_op(regs); return 1; } #endif return 0; } static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) { u32 offset = (index >> 3) * sizeof(struct desc_struct); unsigned long addr; struct ldttss_desc desc; if (index == 0) { pr_alert("%s: NULL\n", name); return; } if (offset + sizeof(struct ldttss_desc) >= gdt->size) { pr_alert("%s: 0x%hx -- out of bounds\n", name, index); return; } if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset), sizeof(struct ldttss_desc))) { pr_alert("%s: 0x%hx -- GDT entry is not readable\n", name, index); return; } addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24); #ifdef CONFIG_X86_64 addr |= ((u64)desc.base3 << 32); #endif pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n", name, index, addr, (desc.limit0 | (desc.limit1 << 16))); } static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) { if (!oops_may_print()) return; if (error_code & X86_PF_INSTR) { unsigned int level; bool nx, rw; pgd_t *pgd; pte_t *pte; pgd = __va(read_cr3_pa()); pgd += pgd_index(address); pte = lookup_address_in_pgd_attr(pgd, address, &level, &nx, &rw); if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx)) pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", from_kuid(&init_user_ns, current_uid())); if (pte && pte_present(*pte) && pte_exec(*pte) && !nx && (pgd_flags(*pgd) & _PAGE_USER) && (__read_cr4() & X86_CR4_SMEP)) pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n", from_kuid(&init_user_ns, current_uid())); } if (address < PAGE_SIZE && !user_mode(regs)) pr_alert("BUG: kernel NULL pointer dereference, address: %px\n", (void *)address); else pr_alert("BUG: unable to handle page fault for address: %px\n", (void *)address); pr_alert("#PF: %s %s in %s mode\n", (error_code & X86_PF_USER) ? "user" : "supervisor", (error_code & X86_PF_INSTR) ? "instruction fetch" : (error_code & X86_PF_WRITE) ? "write access" : "read access", user_mode(regs) ? "user" : "kernel"); pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code, !(error_code & X86_PF_PROT) ? "not-present page" : (error_code & X86_PF_RSVD) ? "reserved bit violation" : (error_code & X86_PF_PK) ? "protection keys violation" : (error_code & X86_PF_RMP) ? "RMP violation" : "permissions violation"); if (!(error_code & X86_PF_USER) && user_mode(regs)) { struct desc_ptr idt, gdt; u16 ldtr, tr; /* * This can happen for quite a few reasons. The more obvious * ones are faults accessing the GDT, or LDT. Perhaps * surprisingly, if the CPU tries to deliver a benign or * contributory exception from user code and gets a page fault * during delivery, the page fault can be delivered as though * it originated directly from user code. This could happen * due to wrong permissions on the IDT, GDT, LDT, TSS, or * kernel or IST stack. */ store_idt(&idt); /* Usable even on Xen PV -- it's just slow. */ native_store_gdt(&gdt); pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n", idt.address, idt.size, gdt.address, gdt.size); store_ldt(ldtr); show_ldttss(&gdt, "LDTR", ldtr); store_tr(tr); show_ldttss(&gdt, "TR", tr); } dump_pagetable(address); if (error_code & X86_PF_RMP) snp_dump_hva_rmpentry(address); } static noinline void pgtable_bad(struct pt_regs *regs, unsigned long error_code, unsigned long address) { struct task_struct *tsk; unsigned long flags; int sig; flags = oops_begin(); tsk = current; sig = SIGKILL; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", tsk->comm, address); dump_pagetable(address); if (__die("Bad pagetable", regs, error_code)) sig = 0; oops_end(flags, regs, sig); } static void sanitize_error_code(unsigned long address, unsigned long *error_code) { /* * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to * kernel addresses are always protection faults. * * NB: This means that failed vsyscalls with vsyscall=none * will have the PROT bit. This doesn't leak any * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) *error_code |= X86_PF_PROT; } static void set_signal_archinfo(unsigned long address, unsigned long error_code) { struct task_struct *tsk = current; tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | X86_PF_USER; tsk->thread.cr2 = address; } static noinline void page_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) { #ifdef CONFIG_VMAP_STACK struct stack_info info; #endif unsigned long flags; int sig; if (user_mode(regs)) { /* * Implicit kernel access from user mode? Skip the stack * overflow and EFI special cases. */ goto oops; } #ifdef CONFIG_VMAP_STACK /* * Stack overflow? During boot, we can fault near the initial * stack in the direct map, but that's not an overflow -- check * that we're in vmalloc space to avoid this. */ if (is_vmalloc_addr((void *)address) && get_stack_guard_info((void *)address, &info)) { /* * We're likely to be running with very little stack space * left. It's plausible that we'd hit this condition but * double-fault even before we get this far, in which case * we're fine: the double-fault handler will deal with it. * * We don't want to make it all the way into the oops code * and then double-fault, though, because we're likely to * break the console driver and lose most of the stack dump. */ call_on_stack(__this_cpu_ist_top_va(DF) - sizeof(void*), handle_stack_overflow, ASM_CALL_ARG3, , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info)); BUG(); } #endif /* * Buggy firmware could access regions which might page fault. If * this happens, EFI has a special OOPS path that will try to * avoid hanging the system. */ if (IS_ENABLED(CONFIG_EFI)) efi_crash_gracefully_on_page_fault(address); /* Only not-present faults should be handled by KFENCE. */ if (!(error_code & X86_PF_PROT) && kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs)) return; oops: /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice: */ flags = oops_begin(); show_fault_oops(regs, error_code, address); if (task_stack_end_corrupted(current)) printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); sig = SIGKILL; if (__die("Oops", regs, error_code)) sig = 0; /* Executive summary in case the body of the oops scrolled away */ printk(KERN_DEFAULT "CR2: %016lx\n", address); oops_end(flags, regs, sig); } static noinline void kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address, int signal, int si_code, u32 pkey) { WARN_ON_ONCE(user_mode(regs)); /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; /* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH * instruction. */ if (is_prefetch(regs, error_code, address)) return; page_fault_oops(regs, error_code, address); } /* * Print out info about fatal segfaults, if the show_unhandled_signals * sysctl is set: */ static inline void show_signal_msg(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct task_struct *tsk) { const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; /* This is a racy snapshot, but it's better than nothing. */ int cpu = raw_smp_processor_id(); if (!unhandled_signal(tsk, SIGSEGV)) return; if (!printk_ratelimit()) return; printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", loglvl, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); print_vma_addr(KERN_CONT " in ", regs->ip); /* * Dump the likely CPU where the fatal segfault happened. * This can help identify faulty hardware. */ printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu, topology_core_id(cpu), topology_physical_package_id(cpu)); printk(KERN_CONT "\n"); show_opcodes(regs, loglvl); } static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) { struct task_struct *tsk = current; if (!user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, SIGSEGV, si_code, pkey); return; } if (!(error_code & X86_PF_USER)) { /* Implicit user access to kernel memory -- just oops */ page_fault_oops(regs, error_code, address); return; } /* * User mode accesses just cause a SIGSEGV. * It's possible to have interrupts off here: */ local_irq_enable(); /* * Valid to do another page fault here because this one came * from user space: */ if (is_prefetch(regs, error_code, address)) return; if (is_errata100(regs, address)) return; sanitize_error_code(address, &error_code); if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) return; if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); set_signal_archinfo(address, error_code); if (si_code == SEGV_PKUERR) force_sig_pkuerr((void __user *)address, pkey); else force_sig_fault(SIGSEGV, si_code, (void __user *)address); local_irq_disable(); } static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address) { __bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR); } static void __bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct mm_struct *mm, struct vm_area_struct *vma, u32 pkey, int si_code) { /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ if (mm) mmap_read_unlock(mm); else vma_end_read(vma); __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } static inline bool bad_area_access_from_pkeys(unsigned long error_code, struct vm_area_struct *vma) { /* This code is always called on the current mm */ bool foreign = false; if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return false; if (error_code & X86_PF_PK) return true; /* this checks permission keys on the VMA: */ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), (error_code & X86_PF_INSTR), foreign)) return true; return false; } static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct mm_struct *mm, struct vm_area_struct *vma) { /* * This OSPKE check is not strictly necessary at runtime. * But, doing it this way allows compiler optimizations * if pkeys are compiled out. */ if (bad_area_access_from_pkeys(error_code, vma)) { /* * A protection key fault means that the PKRU value did not allow * access to some PTE. Userspace can figure out what PKRU was * from the XSAVE state. This function captures the pkey from * the vma and passes it to userspace so userspace can discover * which protection key was set on the PTE. * * If we get here, we know that the hardware signaled a X86_PF_PK * fault and that there was a VMA once we got in the fault * handler. It does *not* guarantee that the VMA we find here * was the one that we faulted on. * * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); * 2. T1 : set PKRU to deny access to pkey=4, touches page * 3. T1 : faults... * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); * 5. T1 : enters fault handler, takes mmap_lock, etc... * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ u32 pkey = vma_pkey(vma); __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR); } else { __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR); } } static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, vm_fault_t fault) { /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); return; } /* User-space => ok to do another page fault: */ if (is_prefetch(regs, error_code, address)) return; sanitize_error_code(address, &error_code); if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) return; set_signal_archinfo(address, error_code); #ifdef CONFIG_MEMORY_FAILURE if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { struct task_struct *tsk = current; unsigned lsb = 0; pr_err( "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", tsk->comm, tsk->pid, address); if (fault & VM_FAULT_HWPOISON_LARGE) lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); if (fault & VM_FAULT_HWPOISON) lsb = PAGE_SHIFT; force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); return; } #endif force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) return 0; if ((error_code & X86_PF_INSTR) && !pte_exec(*pte)) return 0; return 1; } /* * Handle a spurious fault caused by a stale TLB entry. * * This allows us to lazily refresh the TLB when increasing the * permissions of a kernel page (RO -> RW or NX -> X). Doing it * eagerly is very expensive since that implies doing a full * cross-processor TLB flush, even if no stale TLB entries exist * on other processors. * * Spurious faults may only occur if the TLB contains an entry with * fewer permission than the page table entry. Non-present (P = 0) * and reserved bit (R = 1) faults are never spurious. * * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. * * Returns non-zero if a spurious fault was handled, zero otherwise. * * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3 * (Optional Invalidation). */ static noinline int spurious_kernel_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; int ret; /* * Only writes to RO or instruction fetches from NX may cause * spurious faults. * * These could be from user or supervisor accesses but the TLB * is only lazily flushed after a kernel mapping protection * change, so user accesses are not expected to cause spurious * faults. */ if (error_code != (X86_PF_WRITE | X86_PF_PROT) && error_code != (X86_PF_INSTR | X86_PF_PROT)) return 0; pgd = init_mm.pgd + pgd_index(address); if (!pgd_present(*pgd)) return 0; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) return 0; if (p4d_leaf(*p4d)) return spurious_kernel_fault_check(error_code, (pte_t *) p4d); pud = pud_offset(p4d, address); if (!pud_present(*pud)) return 0; if (pud_leaf(*pud)) return spurious_kernel_fault_check(error_code, (pte_t *) pud); pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return 0; if (pmd_leaf(*pmd)) return spurious_kernel_fault_check(error_code, (pte_t *) pmd); pte = pte_offset_kernel(pmd, address); if (!pte_present(*pte)) return 0; ret = spurious_kernel_fault_check(error_code, pte); if (!ret) return 0; /* * Make sure we have permissions in PMD. * If not, then there's a bug in the page tables: */ ret = spurious_kernel_fault_check(error_code, (pte_t *) pmd); WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); return ret; } NOKPROBE_SYMBOL(spurious_kernel_fault); int show_unhandled_signals = 1; static inline int access_error(unsigned long error_code, struct vm_area_struct *vma) { /* This is only called for the current mm, so: */ bool foreign = false; /* * Read or write was blocked by protection keys. This is * always an unconditional error and can never result in * a follow-up action to resolve the fault, like a COW. */ if (error_code & X86_PF_PK) return 1; /* * SGX hardware blocked the access. This usually happens * when the enclave memory contents have been destroyed, like * after a suspend/resume cycle. In any case, the kernel can't * fix the cause of the fault. Handle the fault as an access * error even in cases where no actual access violation * occurred. This allows userspace to rebuild the enclave in * response to the signal. */ if (unlikely(error_code & X86_PF_SGX)) return 1; /* * Make sure to check the VMA so that we do not perform * faults just to hit a X86_PF_PK as soon as we fill in a * page. */ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), (error_code & X86_PF_INSTR), foreign)) return 1; /* * Shadow stack accesses (PF_SHSTK=1) are only permitted to * shadow stack VMAs. All other accesses result in an error. */ if (error_code & X86_PF_SHSTK) { if (unlikely(!(vma->vm_flags & VM_SHADOW_STACK))) return 1; if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; return 0; } if (error_code & X86_PF_WRITE) { /* write, present and write, not present: */ if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) return 1; if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; return 0; } /* read, present: */ if (unlikely(error_code & X86_PF_PROT)) return 1; /* read, not present: */ if (unlikely(!vma_is_accessible(vma))) return 1; return 0; } bool fault_in_kernel_space(unsigned long address) { /* * On 64-bit systems, the vsyscall page is at an address above * TASK_SIZE_MAX, but is not considered part of the kernel * address space. */ if (IS_ENABLED(CONFIG_X86_64) && is_vsyscall_vaddr(address)) return false; return address >= TASK_SIZE_MAX; } /* * Called for all faults where 'address' is part of the kernel address * space. Might get called for faults that originate from *code* that * ran in userspace or the kernel. */ static void do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, unsigned long address) { /* * Protection keys exceptions only happen on user pages. We * have no user pages in the kernel portion of the address * space, so do not expect them here. */ WARN_ON_ONCE(hw_error_code & X86_PF_PK); #ifdef CONFIG_X86_32 /* * We can fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * Before doing this on-demand faulting, ensure that the * fault is not any of the following: * 1. A fault on a PTE with a reserved bit set. * 2. A fault caused by a user-mode access. (Do not demand- * fault kernel memory due to user-mode accesses). * 3. A fault caused by a page-level protection violation. * (A demand fault would be on a non-present page which * would have X86_PF_PROT==0). * * This is only needed to close a race condition on x86-32 in * the vmalloc mapping/unmapping code. See the comment above * vmalloc_fault() for details. On x86-64 the race does not * exist as the vmalloc mappings don't need to be synchronized * there. */ if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { if (vmalloc_fault(address) >= 0) return; } #endif if (is_f00f_bug(regs, hw_error_code, address)) return; /* Was the fault spurious, caused by lazy TLB invalidation? */ if (spurious_kernel_fault(hw_error_code, address)) return; /* kprobes don't want to hook the spurious faults: */ if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF))) return; /* * Note, despite being a "bad area", there are quite a few * acceptable reasons to get here, such as erratum fixups * and handling kernel code that can fault, like get_user(). * * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock: */ bad_area_nosemaphore(regs, hw_error_code, address); } NOKPROBE_SYMBOL(do_kern_addr_fault); /* * Handle faults in the user portion of the address space. Nothing in here * should check X86_PF_USER without a specific justification: for almost * all purposes, we should treat a normal kernel access to user memory * (e.g. get_user(), put_user(), etc.) the same as the WRUSS instruction. * The one exception is AC flag handling, which is, per the x86 * architecture, special for WRUSS. */ static inline void do_user_addr_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; tsk = current; mm = tsk->mm; if (unlikely((error_code & (X86_PF_USER | X86_PF_INSTR)) == X86_PF_INSTR)) { /* * Whoops, this is kernel mode code trying to execute from * user memory. Unless this is AMD erratum #93, which * corrupts RIP such that it looks like a user address, * this is unrecoverable. Don't even try to look up the * VMA or look for extable entries. */ if (is_errata93(regs, address)) return; page_fault_oops(regs, error_code, address); return; } /* kprobes don't want to hook the spurious faults: */ if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF))) return; /* * Reserved bits are never expected to be set on * entries in the user portion of the page tables. */ if (unlikely(error_code & X86_PF_RSVD)) pgtable_bad(regs, error_code, address); /* * If SMAP is on, check for invalid kernel (supervisor) access to user * pages in the user address space. The odd case here is WRUSS, * which, according to the preliminary documentation, does not respect * SMAP and will have the USER bit set so, in all cases, SMAP * enforcement appears to be consistent with the USER bit. */ if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) && !(error_code & X86_PF_USER) && !(regs->flags & X86_EFLAGS_AC))) { /* * No extable entry here. This was a kernel access to an * invalid pointer. get_kernel_nofault() will not get here. */ page_fault_oops(regs, error_code, address); return; } /* * If we're in an interrupt, have no user context or are running * in a region with pagefaults disabled then we must not take the fault */ if (unlikely(faulthandler_disabled() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } /* Legacy check - remove this after verifying that it doesn't trigger */ if (WARN_ON_ONCE(!(regs->flags & X86_EFLAGS_IF))) { bad_area_nosemaphore(regs, error_code, address); return; } local_irq_enable(); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* * Read-only permissions can not be expressed in shadow stack PTEs. * Treat all shadow stack accesses as WRITE faults. This ensures * that the MM will prepare everything (e.g., break COW) such that * maybe_mkwrite() can create a proper shadow stack PTE. */ if (error_code & X86_PF_SHSTK) flags |= FAULT_FLAG_WRITE; if (error_code & X86_PF_WRITE) flags |= FAULT_FLAG_WRITE; if (error_code & X86_PF_INSTR) flags |= FAULT_FLAG_INSTRUCTION; /* * We set FAULT_FLAG_USER based on the register state, not * based on X86_PF_USER. User space accesses that cause * system page faults are still user accesses. */ if (user_mode(regs)) flags |= FAULT_FLAG_USER; #ifdef CONFIG_X86_64 /* * Faults in the vsyscall page might need emulation. The * vsyscall page is at a high address (>PAGE_OFFSET), but is * considered to be part of the user address space. * * The vsyscall page does not have a "real" VMA, so do this * emulation before we go searching for VMAs. * * PKRU never rejects instruction fetches, so we don't need * to consider the PF_PK bit. */ if (is_vsyscall_vaddr(address)) { if (emulate_vsyscall(error_code, regs, address)) return; } #endif if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; if (unlikely(access_error(error_code, vma))) { bad_area_access_error(regs, error_code, address, NULL, vma); count_vm_vma_lock_event(VMA_LOCK_SUCCESS); return; } fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { count_vm_vma_lock_event(VMA_LOCK_SUCCESS); goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); if (fault & VM_FAULT_MAJOR) flags |= FAULT_FLAG_TRIED; /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); return; } lock_mmap: retry: vma = lock_mm_and_find_vma(mm, address, regs); if (unlikely(!vma)) { bad_area_nosemaphore(regs, error_code, address); return; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ if (unlikely(access_error(error_code, vma))) { bad_area_access_error(regs, error_code, address, mm, vma); return; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if * we get VM_FAULT_RETRY back, the mmap_lock has been unlocked. * * Note that handle_userfault() may also release and reacquire mmap_lock * (and not return with VM_FAULT_RETRY), when returning to userland to * repeat the page fault later with a VM_FAULT_NOPAGE retval * (potentially after handling any pending signal during the return to * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { /* * Quick path to respond to signals. The core mm code * has unlocked the mm for us if we get here. */ if (!user_mode(regs)) kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY); return; } /* The fault is fully completed (including releasing mmap lock) */ if (fault & VM_FAULT_COMPLETED) return; /* * If we need to retry the mmap_lock has already been released, * and if there is a fatal signal pending there is no guarantee * that we made any progress. Handle this case first. */ if (unlikely(fault & VM_FAULT_RETRY)) { flags |= FAULT_FLAG_TRIED; goto retry; } mmap_read_unlock(mm); done: if (likely(!(fault & VM_FAULT_ERROR))) return; if (fatal_signal_pending(current) && !user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, 0, 0, ARCH_DEFAULT_PKEY); return; } if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ if (!user_mode(regs)) { kernelmode_fixup_or_oops(regs, error_code, address, SIGSEGV, SEGV_MAPERR, ARCH_DEFAULT_PKEY); return; } /* * We ran out of memory, call the OOM killer, and return the * userspace (which will retry the fault, or kill us if we got * oom-killed): */ pagefault_out_of_memory(); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) do_sigbus(regs, error_code, address, fault); else if (fault & VM_FAULT_SIGSEGV) bad_area_nosemaphore(regs, error_code, address); else BUG(); } } NOKPROBE_SYMBOL(do_user_addr_fault); static __always_inline void trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, unsigned long address) { if (!trace_pagefault_enabled()) return; if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); else trace_page_fault_kernel(address, regs, error_code); } static __always_inline void handle_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { trace_page_fault_entries(regs, error_code, address); if (unlikely(kmmio_fault(regs, address))) return; /* Was the fault on kernel-controlled part of the address space? */ if (unlikely(fault_in_kernel_space(address))) { do_kern_addr_fault(regs, error_code, address); } else { do_user_addr_fault(regs, error_code, address); /* * User address page fault handling might have reenabled * interrupts. Fixing up all potential exit points of * do_user_addr_fault() and its leaf functions is just not * doable w/o creating an unholy mess or turning the code * upside down. */ local_irq_disable(); } } DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) { irqentry_state_t state; unsigned long address; address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2(); prefetchw(&current->mm->mmap_lock); /* * KVM uses #PF vector to deliver 'page not present' events to guests * (asynchronous page fault mechanism). The event happens when a * userspace task is trying to access some valid (from guest's point of * view) memory which is not currently mapped by the host (e.g. the * memory is swapped out). Note, the corresponding "page ready" event * which is injected when the memory becomes available, is delivered via * an interrupt mechanism and not a #PF exception * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()). * * We are relying on the interrupted context being sane (valid RSP, * relevant locks not held, etc.), which is fine as long as the * interrupted context had IF=1. We are also relying on the KVM * async pf type field and CR2 being read consistently instead of * getting values from real and async page faults mixed up. * * Fingers crossed. * * The async #PF handling code takes care of idtentry handling * itself. */ if (kvm_handle_async_pf(regs, (u32)address)) return; /* * Entry handling for valid #PF from kernel mode is slightly * different: RCU is already watching and ct_irq_enter() must not * be invoked because a kernel fault on a user space address might * sleep. * * In case the fault hit a RCU idle region the conditional entry * code reenabled RCU to avoid subsequent wreckage which helps * debuggability. */ state = irqentry_enter(regs); instrumentation_begin(); handle_page_fault(regs, error_code, address); instrumentation_end(); irqentry_exit(regs, state); }
39 5 5 34 39 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ #ifndef _NET_BATMAN_ADV_HASH_H_ #define _NET_BATMAN_ADV_HASH_H_ #include "main.h" #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/types.h> /* callback to a compare function. should compare 2 element data for their * keys * * Return: true if same and false if not same */ typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *, const void *); /* the hashfunction * * Return: an index based on the key in the data of the first argument and the * size the second */ typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); /** * struct batadv_hashtable - Wrapper of simple hlist based hashtable */ struct batadv_hashtable { /** @table: the hashtable itself with the buckets */ struct hlist_head *table; /** @list_locks: spinlock for each hash list entry */ spinlock_t *list_locks; /** @size: size of hashtable */ u32 size; /** @generation: current (generation) sequence number */ atomic_t generation; }; /* allocates and clears the hash */ struct batadv_hashtable *batadv_hash_new(u32 size); /* set class key for all locks */ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, struct lock_class_key *key); /* free only the hashtable and the hash itself. */ void batadv_hash_destroy(struct batadv_hashtable *hash); /** * batadv_hash_add() - adds data to the hashtable * @hash: storage hash table * @compare: callback to determine if 2 hash elements are identical * @choose: callback calculating the hash index * @data: data passed to the aforementioned callbacks as argument * @data_node: to be added element * * Return: 0 on success, 1 if the element already is in the hash * and -1 on error. */ static inline int batadv_hash_add(struct batadv_hashtable *hash, batadv_hashdata_compare_cb compare, batadv_hashdata_choose_cb choose, const void *data, struct hlist_node *data_node) { u32 index; int ret = -1; struct hlist_head *head; struct hlist_node *node; spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) goto out; index = choose(data, hash->size); head = &hash->table[index]; list_lock = &hash->list_locks[index]; spin_lock_bh(list_lock); hlist_for_each(node, head) { if (!compare(node, data)) continue; ret = 1; goto unlock; } /* no duplicate found in list, add new element */ hlist_add_head_rcu(data_node, head); atomic_inc(&hash->generation); ret = 0; unlock: spin_unlock_bh(list_lock); out: return ret; } /** * batadv_hash_remove() - Removes data from hash, if found * @hash: hash table * @compare: callback to determine if 2 hash elements are identical * @choose: callback calculating the hash index * @data: data passed to the aforementioned callbacks as argument * * ata could be the structure you use with just the key filled, we just need * the key for comparing. * * Return: returns pointer do data on success, so you can remove the used * structure yourself, or NULL on error */ static inline void *batadv_hash_remove(struct batadv_hashtable *hash, batadv_hashdata_compare_cb compare, batadv_hashdata_choose_cb choose, void *data) { u32 index; struct hlist_node *node; struct hlist_head *head; void *data_save = NULL; index = choose(data, hash->size); head = &hash->table[index]; spin_lock_bh(&hash->list_locks[index]); hlist_for_each(node, head) { if (!compare(node, data)) continue; data_save = node; hlist_del_rcu(node); atomic_inc(&hash->generation); break; } spin_unlock_bh(&hash->list_locks[index]); return data_save; } #endif /* _NET_BATMAN_ADV_HASH_H_ */
1 7 1 2 1 2 1 2 2 1 2 1 1 1 1 1 12 1 11 11 10 12 3 2 1 3 7 1 7 7 7 7 1 1 1 4 2 3 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 // SPDX-License-Identifier: GPL-2.0-or-later /* * vimc-capture.c Virtual Media Controller Driver * * Copyright (C) 2015-2017 Helen Koike <helen.fornazier@gmail.com> */ #include <media/v4l2-ioctl.h> #include <media/videobuf2-core.h> #include <media/videobuf2-dma-contig.h> #include <media/videobuf2-vmalloc.h> #include "vimc-common.h" #include "vimc-streamer.h" struct vimc_capture_device { struct vimc_ent_device ved; struct video_device vdev; struct v4l2_pix_format format; struct vb2_queue queue; struct list_head buf_list; /* * NOTE: in a real driver, a spin lock must be used to access the * queue because the frames are generated from a hardware interruption * and the isr is not allowed to sleep. * Even if it is not necessary a spinlock in the vimc driver, we * use it here as a code reference */ spinlock_t qlock; struct mutex lock; u32 sequence; struct vimc_stream stream; struct media_pad pad; }; static const struct v4l2_pix_format fmt_default = { .width = 640, .height = 480, .pixelformat = V4L2_PIX_FMT_RGB24, .field = V4L2_FIELD_NONE, .colorspace = V4L2_COLORSPACE_SRGB, }; struct vimc_capture_buffer { /* * struct vb2_v4l2_buffer must be the first element * the videobuf2 framework will allocate this struct based on * buf_struct_size and use the first sizeof(struct vb2_buffer) bytes of * memory as a vb2_buffer */ struct vb2_v4l2_buffer vb2; struct list_head list; }; static int vimc_capture_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, VIMC_PDEV_NAME, sizeof(cap->driver)); strscpy(cap->card, KBUILD_MODNAME, sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", VIMC_PDEV_NAME); return 0; } static void vimc_capture_get_format(struct vimc_ent_device *ved, struct v4l2_pix_format *fmt) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); *fmt = vcapture->format; } static int vimc_capture_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); f->fmt.pix = vcapture->format; return 0; } static int vimc_capture_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format *format = &f->fmt.pix; const struct vimc_pix_map *vpix; format->width = clamp_t(u32, format->width, VIMC_FRAME_MIN_WIDTH, VIMC_FRAME_MAX_WIDTH) & ~1; format->height = clamp_t(u32, format->height, VIMC_FRAME_MIN_HEIGHT, VIMC_FRAME_MAX_HEIGHT) & ~1; /* Don't accept a pixelformat that is not on the table */ vpix = vimc_pix_map_by_pixelformat(format->pixelformat); if (!vpix) { format->pixelformat = fmt_default.pixelformat; vpix = vimc_pix_map_by_pixelformat(format->pixelformat); } /* TODO: Add support for custom bytesperline values */ format->bytesperline = format->width * vpix->bpp; format->sizeimage = format->bytesperline * format->height; if (format->field == V4L2_FIELD_ANY) format->field = fmt_default.field; vimc_colorimetry_clamp(format); if (format->colorspace == V4L2_COLORSPACE_DEFAULT) format->colorspace = fmt_default.colorspace; return 0; } static int vimc_capture_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vimc_capture_device *vcapture = video_drvdata(file); int ret; /* Do not change the format while stream is on */ if (vb2_is_busy(&vcapture->queue)) return -EBUSY; ret = vimc_capture_try_fmt_vid_cap(file, priv, f); if (ret) return ret; dev_dbg(vcapture->ved.dev, "%s: format update: " "old:%dx%d (0x%x, %d, %d, %d, %d) " "new:%dx%d (0x%x, %d, %d, %d, %d)\n", vcapture->vdev.name, /* old */ vcapture->format.width, vcapture->format.height, vcapture->format.pixelformat, vcapture->format.colorspace, vcapture->format.quantization, vcapture->format.xfer_func, vcapture->format.ycbcr_enc, /* new */ f->fmt.pix.width, f->fmt.pix.height, f->fmt.pix.pixelformat, f->fmt.pix.colorspace, f->fmt.pix.quantization, f->fmt.pix.xfer_func, f->fmt.pix.ycbcr_enc); vcapture->format = f->fmt.pix; return 0; } static int vimc_capture_enum_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fmtdesc *f) { const struct vimc_pix_map *vpix; if (f->mbus_code) { if (f->index > 0) return -EINVAL; vpix = vimc_pix_map_by_code(f->mbus_code); } else { vpix = vimc_pix_map_by_index(f->index); } if (!vpix) return -EINVAL; f->pixelformat = vpix->pixelformat; return 0; } static int vimc_capture_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { const struct vimc_pix_map *vpix; if (fsize->index) return -EINVAL; /* Only accept code in the pix map table */ vpix = vimc_pix_map_by_code(fsize->pixel_format); if (!vpix) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; fsize->stepwise.min_width = VIMC_FRAME_MIN_WIDTH; fsize->stepwise.max_width = VIMC_FRAME_MAX_WIDTH; fsize->stepwise.min_height = VIMC_FRAME_MIN_HEIGHT; fsize->stepwise.max_height = VIMC_FRAME_MAX_HEIGHT; fsize->stepwise.step_width = 1; fsize->stepwise.step_height = 1; return 0; } static const struct v4l2_file_operations vimc_capture_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = vb2_fop_mmap, }; static const struct v4l2_ioctl_ops vimc_capture_ioctl_ops = { .vidioc_querycap = vimc_capture_querycap, .vidioc_g_fmt_vid_cap = vimc_capture_g_fmt_vid_cap, .vidioc_s_fmt_vid_cap = vimc_capture_s_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vimc_capture_try_fmt_vid_cap, .vidioc_enum_fmt_vid_cap = vimc_capture_enum_fmt_vid_cap, .vidioc_enum_framesizes = vimc_capture_enum_framesizes, .vidioc_reqbufs = vb2_ioctl_reqbufs, .vidioc_create_bufs = vb2_ioctl_create_bufs, .vidioc_prepare_buf = vb2_ioctl_prepare_buf, .vidioc_querybuf = vb2_ioctl_querybuf, .vidioc_qbuf = vb2_ioctl_qbuf, .vidioc_dqbuf = vb2_ioctl_dqbuf, .vidioc_expbuf = vb2_ioctl_expbuf, .vidioc_streamon = vb2_ioctl_streamon, .vidioc_streamoff = vb2_ioctl_streamoff, .vidioc_remove_bufs = vb2_ioctl_remove_bufs, }; static void vimc_capture_return_all_buffers(struct vimc_capture_device *vcapture, enum vb2_buffer_state state) { struct vimc_capture_buffer *vbuf, *node; spin_lock(&vcapture->qlock); list_for_each_entry_safe(vbuf, node, &vcapture->buf_list, list) { list_del(&vbuf->list); vb2_buffer_done(&vbuf->vb2.vb2_buf, state); } spin_unlock(&vcapture->qlock); } static int vimc_capture_start_streaming(struct vb2_queue *vq, unsigned int count) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); int ret; vcapture->sequence = 0; /* Start the media pipeline */ ret = video_device_pipeline_start(&vcapture->vdev, &vcapture->stream.pipe); if (ret) { vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } ret = vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 1); if (ret) { video_device_pipeline_stop(&vcapture->vdev); vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_QUEUED); return ret; } return 0; } /* * Stop the stream engine. Any remaining buffers in the stream queue are * dequeued and passed on to the vb2 framework marked as STATE_ERROR. */ static void vimc_capture_stop_streaming(struct vb2_queue *vq) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); vimc_streamer_s_stream(&vcapture->stream, &vcapture->ved, 0); /* Stop the media pipeline */ video_device_pipeline_stop(&vcapture->vdev); /* Release all active buffers */ vimc_capture_return_all_buffers(vcapture, VB2_BUF_STATE_ERROR); } static void vimc_capture_buf_queue(struct vb2_buffer *vb2_buf) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb2_buf->vb2_queue); struct vimc_capture_buffer *buf = container_of(vb2_buf, struct vimc_capture_buffer, vb2.vb2_buf); spin_lock(&vcapture->qlock); list_add_tail(&buf->list, &vcapture->buf_list); spin_unlock(&vcapture->qlock); } static int vimc_capture_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vq); if (*nplanes) return sizes[0] < vcapture->format.sizeimage ? -EINVAL : 0; /* We don't support multiplanes for now */ *nplanes = 1; sizes[0] = vcapture->format.sizeimage; return 0; } static int vimc_capture_buffer_prepare(struct vb2_buffer *vb) { struct vimc_capture_device *vcapture = vb2_get_drv_priv(vb->vb2_queue); unsigned long size = vcapture->format.sizeimage; if (vb2_plane_size(vb, 0) < size) { dev_err(vcapture->ved.dev, "%s: buffer too small (%lu < %lu)\n", vcapture->vdev.name, vb2_plane_size(vb, 0), size); return -EINVAL; } return 0; } static const struct vb2_ops vimc_capture_qops = { .start_streaming = vimc_capture_start_streaming, .stop_streaming = vimc_capture_stop_streaming, .buf_queue = vimc_capture_buf_queue, .queue_setup = vimc_capture_queue_setup, .buf_prepare = vimc_capture_buffer_prepare, }; static const struct media_entity_operations vimc_capture_mops = { .link_validate = vimc_vdev_link_validate, }; static void vimc_capture_release(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); media_entity_cleanup(vcapture->ved.ent); kfree(vcapture); } static void vimc_capture_unregister(struct vimc_ent_device *ved) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); vb2_video_unregister_device(&vcapture->vdev); } static void *vimc_capture_process_frame(struct vimc_ent_device *ved, const void *frame) { struct vimc_capture_device *vcapture = container_of(ved, struct vimc_capture_device, ved); struct vimc_capture_buffer *vimc_buf; void *vbuf; spin_lock(&vcapture->qlock); /* Get the first entry of the list */ vimc_buf = list_first_entry_or_null(&vcapture->buf_list, typeof(*vimc_buf), list); if (!vimc_buf) { spin_unlock(&vcapture->qlock); return ERR_PTR(-EAGAIN); } /* Remove this entry from the list */ list_del(&vimc_buf->list); spin_unlock(&vcapture->qlock); /* Fill the buffer */ vimc_buf->vb2.vb2_buf.timestamp = ktime_get_ns(); vimc_buf->vb2.sequence = vcapture->sequence++; vimc_buf->vb2.field = vcapture->format.field; vbuf = vb2_plane_vaddr(&vimc_buf->vb2.vb2_buf, 0); memcpy(vbuf, frame, vcapture->format.sizeimage); /* Set it as ready */ vb2_set_plane_payload(&vimc_buf->vb2.vb2_buf, 0, vcapture->format.sizeimage); vb2_buffer_done(&vimc_buf->vb2.vb2_buf, VB2_BUF_STATE_DONE); return NULL; } static struct vimc_ent_device *vimc_capture_add(struct vimc_device *vimc, const char *vcfg_name) { struct v4l2_device *v4l2_dev = &vimc->v4l2_dev; const struct vimc_pix_map *vpix; struct vimc_capture_device *vcapture; struct video_device *vdev; struct vb2_queue *q; int ret; /* Allocate the vimc_capture_device struct */ vcapture = kzalloc(sizeof(*vcapture), GFP_KERNEL); if (!vcapture) return ERR_PTR(-ENOMEM); /* Initialize the media entity */ vcapture->vdev.entity.name = vcfg_name; vcapture->vdev.entity.function = MEDIA_ENT_F_IO_V4L; vcapture->pad.flags = MEDIA_PAD_FL_SINK; ret = media_entity_pads_init(&vcapture->vdev.entity, 1, &vcapture->pad); if (ret) goto err_free_vcapture; /* Initialize the lock */ mutex_init(&vcapture->lock); /* Initialize the vb2 queue */ q = &vcapture->queue; q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; q->io_modes = VB2_MMAP | VB2_DMABUF; if (vimc_allocator == VIMC_ALLOCATOR_VMALLOC) q->io_modes |= VB2_USERPTR; q->drv_priv = vcapture; q->buf_struct_size = sizeof(struct vimc_capture_buffer); q->ops = &vimc_capture_qops; q->mem_ops = vimc_allocator == VIMC_ALLOCATOR_DMA_CONTIG ? &vb2_dma_contig_memops : &vb2_vmalloc_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->min_reqbufs_allocation = 2; q->lock = &vcapture->lock; q->dev = v4l2_dev->dev; ret = vb2_queue_init(q); if (ret) { dev_err(vimc->mdev.dev, "%s: vb2 queue init failed (err=%d)\n", vcfg_name, ret); goto err_clean_m_ent; } /* Initialize buffer list and its lock */ INIT_LIST_HEAD(&vcapture->buf_list); spin_lock_init(&vcapture->qlock); /* Set default frame format */ vcapture->format = fmt_default; vpix = vimc_pix_map_by_pixelformat(vcapture->format.pixelformat); vcapture->format.bytesperline = vcapture->format.width * vpix->bpp; vcapture->format.sizeimage = vcapture->format.bytesperline * vcapture->format.height; /* Fill the vimc_ent_device struct */ vcapture->ved.ent = &vcapture->vdev.entity; vcapture->ved.process_frame = vimc_capture_process_frame; vcapture->ved.vdev_get_format = vimc_capture_get_format; vcapture->ved.dev = vimc->mdev.dev; /* Initialize the video_device struct */ vdev = &vcapture->vdev; vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_IO_MC; vdev->entity.ops = &vimc_capture_mops; vdev->release = video_device_release_empty; vdev->fops = &vimc_capture_fops; vdev->ioctl_ops = &vimc_capture_ioctl_ops; vdev->lock = &vcapture->lock; vdev->queue = q; vdev->v4l2_dev = v4l2_dev; vdev->vfl_dir = VFL_DIR_RX; strscpy(vdev->name, vcfg_name, sizeof(vdev->name)); video_set_drvdata(vdev, &vcapture->ved); /* Register the video_device with the v4l2 and the media framework */ ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (ret) { dev_err(vimc->mdev.dev, "%s: video register failed (err=%d)\n", vcapture->vdev.name, ret); goto err_clean_m_ent; } return &vcapture->ved; err_clean_m_ent: media_entity_cleanup(&vcapture->vdev.entity); err_free_vcapture: kfree(vcapture); return ERR_PTR(ret); } const struct vimc_ent_type vimc_capture_type = { .add = vimc_capture_add, .unregister = vimc_capture_unregister, .release = vimc_capture_release };
797 788 746 738 737 738 737 17 738 8 8 1 8 763 763 736 45 45 737 13 13 13 13 13 6 7 7 1 11 3 8 737 2 727 27 27 27 27 7 7 7 7 26 25 26 25 22 5 4 23 6 5 20 20 2 19 134 19 134 19 19 19 19 19 19 6 6 6 6 6 2 2 5 4 3 2 3 5 5 5 6 1 3 3 3 3 3 3 2 1 1 3 3 4 4 4 4 4 4 4 4 4 4 160 159 161 4 157 4 4 4 700 700 700 700 3 697 3 3 3 3 1 2 3 2 2 134 134 119 134 1 134 134 31 32 31 30 27 32 29 27 27 27 4 7 2 32 4 4 4 4 4 4 1 1 5 4 1 1 1 1 1 158 155 155 6 149 147 26 159 63 61 58 2 57 2 2 141 134 141 138 140 141 700 700 699 2 700 2 24 26 5 7 9 41 13 41 26 41 41 41 1 41 41 29 29 12 40 14 14 14 14 13 12 12 124 118 124 124 124 1 124 124 121 121 5 124 45 32 5 40 44 13 8 5 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2002,2003 by Andreas Gruenbacher <a.gruenbacher@computer.org> * * Fixes from William Schumacher incorporated on 15 March 2001. * (Reported by Charles Bertsch, <CBertsch@microtest.com>). */ /* * This file contains generic functions for manipulating * POSIX 1003.1e draft standard 17 ACLs. */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/atomic.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/namei.h> #include <linux/mnt_idmapping.h> #include <linux/iversion.h> #include <linux/security.h> #include <linux/fsnotify.h> #include <linux/filelock.h> #include "internal.h" static struct posix_acl **acl_by_type(struct inode *inode, int type) { switch (type) { case ACL_TYPE_ACCESS: return &inode->i_acl; case ACL_TYPE_DEFAULT: return &inode->i_default_acl; default: BUG(); } } struct posix_acl *get_cached_acl(struct inode *inode, int type) { struct posix_acl **p = acl_by_type(inode, type); struct posix_acl *acl; for (;;) { rcu_read_lock(); acl = rcu_dereference(*p); if (!acl || is_uncached_acl(acl) || refcount_inc_not_zero(&acl->a_refcount)) break; rcu_read_unlock(); cpu_relax(); } rcu_read_unlock(); return acl; } EXPORT_SYMBOL(get_cached_acl); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type)); if (acl == ACL_DONT_CACHE) { struct posix_acl *ret; ret = inode->i_op->get_inode_acl(inode, type, LOOKUP_RCU); if (!IS_ERR(ret)) acl = ret; } return acl; } EXPORT_SYMBOL(get_cached_acl_rcu); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) { struct posix_acl **p = acl_by_type(inode, type); struct posix_acl *old; old = xchg(p, posix_acl_dup(acl)); if (!is_uncached_acl(old)) posix_acl_release(old); } EXPORT_SYMBOL(set_cached_acl); static void __forget_cached_acl(struct posix_acl **p) { struct posix_acl *old; old = xchg(p, ACL_NOT_CACHED); if (!is_uncached_acl(old)) posix_acl_release(old); } void forget_cached_acl(struct inode *inode, int type) { __forget_cached_acl(acl_by_type(inode, type)); } EXPORT_SYMBOL(forget_cached_acl); void forget_all_cached_acls(struct inode *inode) { __forget_cached_acl(&inode->i_acl); __forget_cached_acl(&inode->i_default_acl); } EXPORT_SYMBOL(forget_all_cached_acls); static struct posix_acl *__get_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, int type) { struct posix_acl *sentinel; struct posix_acl **p; struct posix_acl *acl; /* * The sentinel is used to detect when another operation like * set_cached_acl() or forget_cached_acl() races with get_inode_acl(). * It is guaranteed that is_uncached_acl(sentinel) is true. */ acl = get_cached_acl(inode, type); if (!is_uncached_acl(acl)) return acl; if (!IS_POSIXACL(inode)) return NULL; sentinel = uncached_acl_sentinel(current); p = acl_by_type(inode, type); /* * If the ACL isn't being read yet, set our sentinel. Otherwise, the * current value of the ACL will not be ACL_NOT_CACHED and so our own * sentinel will not be set; another task will update the cache. We * could wait for that other task to complete its job, but it's easier * to just call ->get_inode_acl to fetch the ACL ourself. (This is * going to be an unlikely race.) */ cmpxchg(p, ACL_NOT_CACHED, sentinel); /* * Normally, the ACL returned by ->get{_inode}_acl will be cached. * A filesystem can prevent that by calling * forget_cached_acl(inode, type) in ->get{_inode}_acl. * * If the filesystem doesn't have a get{_inode}_ acl() function at all, * we'll just create the negative cache entry. */ if (dentry && inode->i_op->get_acl) { acl = inode->i_op->get_acl(idmap, dentry, type); } else if (inode->i_op->get_inode_acl) { acl = inode->i_op->get_inode_acl(inode, type, false); } else { set_cached_acl(inode, type, NULL); return NULL; } if (IS_ERR(acl)) { /* * Remove our sentinel so that we don't block future attempts * to cache the ACL. */ cmpxchg(p, sentinel, ACL_NOT_CACHED); return acl; } /* * Cache the result, but only if our sentinel is still in place. */ posix_acl_dup(acl); if (unlikely(!try_cmpxchg(p, &sentinel, acl))) posix_acl_release(acl); return acl; } struct posix_acl *get_inode_acl(struct inode *inode, int type) { return __get_acl(&nop_mnt_idmap, NULL, inode, type); } EXPORT_SYMBOL(get_inode_acl); /* * Init a fresh posix_acl */ void posix_acl_init(struct posix_acl *acl, int count) { refcount_set(&acl->a_refcount, 1); acl->a_count = count; } EXPORT_SYMBOL(posix_acl_init); /* * Allocate a new ACL with the specified number of entries. */ struct posix_acl * posix_acl_alloc(unsigned int count, gfp_t flags) { struct posix_acl *acl; acl = kmalloc(struct_size(acl, a_entries, count), flags); if (acl) posix_acl_init(acl, count); return acl; } EXPORT_SYMBOL(posix_acl_alloc); /* * Clone an ACL. */ struct posix_acl * posix_acl_clone(const struct posix_acl *acl, gfp_t flags) { struct posix_acl *clone = NULL; if (acl) { clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count), flags); if (clone) refcount_set(&clone->a_refcount, 1); } return clone; } EXPORT_SYMBOL_GPL(posix_acl_clone); /* * Check if an acl is valid. Returns 0 if it is, or -E... otherwise. */ int posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl) { const struct posix_acl_entry *pa, *pe; int state = ACL_USER_OBJ; int needs_mask = 0; FOREACH_ACL_ENTRY(pa, acl, pe) { if (pa->e_perm & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) return -EINVAL; switch (pa->e_tag) { case ACL_USER_OBJ: if (state == ACL_USER_OBJ) { state = ACL_USER; break; } return -EINVAL; case ACL_USER: if (state != ACL_USER) return -EINVAL; if (!kuid_has_mapping(user_ns, pa->e_uid)) return -EINVAL; needs_mask = 1; break; case ACL_GROUP_OBJ: if (state == ACL_USER) { state = ACL_GROUP; break; } return -EINVAL; case ACL_GROUP: if (state != ACL_GROUP) return -EINVAL; if (!kgid_has_mapping(user_ns, pa->e_gid)) return -EINVAL; needs_mask = 1; break; case ACL_MASK: if (state != ACL_GROUP) return -EINVAL; state = ACL_OTHER; break; case ACL_OTHER: if (state == ACL_OTHER || (state == ACL_GROUP && !needs_mask)) { state = 0; break; } return -EINVAL; default: return -EINVAL; } } if (state == 0) return 0; return -EINVAL; } EXPORT_SYMBOL(posix_acl_valid); /* * Returns 0 if the acl can be exactly represented in the traditional * file mode permission bits, or else 1. Returns -E... on error. */ int posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) { const struct posix_acl_entry *pa, *pe; umode_t mode = 0; int not_equiv = 0; /* * A null ACL can always be presented as mode bits. */ if (!acl) return 0; FOREACH_ACL_ENTRY(pa, acl, pe) { switch (pa->e_tag) { case ACL_USER_OBJ: mode |= (pa->e_perm & S_IRWXO) << 6; break; case ACL_GROUP_OBJ: mode |= (pa->e_perm & S_IRWXO) << 3; break; case ACL_OTHER: mode |= pa->e_perm & S_IRWXO; break; case ACL_MASK: mode = (mode & ~S_IRWXG) | ((pa->e_perm & S_IRWXO) << 3); not_equiv = 1; break; case ACL_USER: case ACL_GROUP: not_equiv = 1; break; default: return -EINVAL; } } if (mode_p) *mode_p = (*mode_p & ~S_IRWXUGO) | mode; return not_equiv; } EXPORT_SYMBOL(posix_acl_equiv_mode); /* * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * posix_acl_from_mode(umode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) return ERR_PTR(-ENOMEM); acl->a_entries[0].e_tag = ACL_USER_OBJ; acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6; acl->a_entries[1].e_tag = ACL_GROUP_OBJ; acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3; acl->a_entries[2].e_tag = ACL_OTHER; acl->a_entries[2].e_perm = (mode & S_IRWXO); return acl; } EXPORT_SYMBOL(posix_acl_from_mode); /* * Return 0 if current is granted want access to the inode * by the acl. Returns -E... otherwise. */ int posix_acl_permission(struct mnt_idmap *idmap, struct inode *inode, const struct posix_acl *acl, int want) { const struct posix_acl_entry *pa, *pe, *mask_obj; struct user_namespace *fs_userns = i_user_ns(inode); int found = 0; vfsuid_t vfsuid; vfsgid_t vfsgid; want &= MAY_READ | MAY_WRITE | MAY_EXEC; FOREACH_ACL_ENTRY(pa, acl, pe) { switch(pa->e_tag) { case ACL_USER_OBJ: /* (May have been checked already) */ vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto check_perm; break; case ACL_USER: vfsuid = make_vfsuid(idmap, fs_userns, pa->e_uid); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto mask; break; case ACL_GROUP_OBJ: vfsgid = i_gid_into_vfsgid(idmap, inode); if (vfsgid_in_group_p(vfsgid)) { found = 1; if ((pa->e_perm & want) == want) goto mask; } break; case ACL_GROUP: vfsgid = make_vfsgid(idmap, fs_userns, pa->e_gid); if (vfsgid_in_group_p(vfsgid)) { found = 1; if ((pa->e_perm & want) == want) goto mask; } break; case ACL_MASK: break; case ACL_OTHER: if (found) return -EACCES; else goto check_perm; default: return -EIO; } } return -EIO; mask: for (mask_obj = pa+1; mask_obj != pe; mask_obj++) { if (mask_obj->e_tag == ACL_MASK) { if ((pa->e_perm & mask_obj->e_perm & want) == want) return 0; return -EACCES; } } check_perm: if ((pa->e_perm & want) == want) return 0; return -EACCES; } /* * Modify acl when creating a new inode. The caller must ensure the acl is * only referenced once. * * mode_p initially must contain the mode parameter to the open() / creat() * system calls. All permissions that are not granted by the acl are removed. * The permissions in the acl are changed to reflect the mode_p parameter. */ static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) { struct posix_acl_entry *pa, *pe; struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; umode_t mode = *mode_p; int not_equiv = 0; /* assert(atomic_read(acl->a_refcount) == 1); */ FOREACH_ACL_ENTRY(pa, acl, pe) { switch(pa->e_tag) { case ACL_USER_OBJ: pa->e_perm &= (mode >> 6) | ~S_IRWXO; mode &= (pa->e_perm << 6) | ~S_IRWXU; break; case ACL_USER: case ACL_GROUP: not_equiv = 1; break; case ACL_GROUP_OBJ: group_obj = pa; break; case ACL_OTHER: pa->e_perm &= mode | ~S_IRWXO; mode &= pa->e_perm | ~S_IRWXO; break; case ACL_MASK: mask_obj = pa; not_equiv = 1; break; default: return -EIO; } } if (mask_obj) { mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO; mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; } else { if (!group_obj) return -EIO; group_obj->e_perm &= (mode >> 3) | ~S_IRWXO; mode &= (group_obj->e_perm << 3) | ~S_IRWXG; } *mode_p = (*mode_p & ~S_IRWXUGO) | mode; return not_equiv; } /* * Modify the ACL for the chmod syscall. */ static int __posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode) { struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; struct posix_acl_entry *pa, *pe; /* assert(atomic_read(acl->a_refcount) == 1); */ FOREACH_ACL_ENTRY(pa, acl, pe) { switch(pa->e_tag) { case ACL_USER_OBJ: pa->e_perm = (mode & S_IRWXU) >> 6; break; case ACL_USER: case ACL_GROUP: break; case ACL_GROUP_OBJ: group_obj = pa; break; case ACL_MASK: mask_obj = pa; break; case ACL_OTHER: pa->e_perm = (mode & S_IRWXO); break; default: return -EIO; } } if (mask_obj) { mask_obj->e_perm = (mode & S_IRWXG) >> 3; } else { if (!group_obj) return -EIO; group_obj->e_perm = (mode & S_IRWXG) >> 3; } return 0; } int __posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; if (clone) { err = posix_acl_create_masq(clone, mode_p); if (err < 0) { posix_acl_release(clone); clone = NULL; } } posix_acl_release(*acl); *acl = clone; return err; } EXPORT_SYMBOL(__posix_acl_create); int __posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; if (clone) { err = __posix_acl_chmod_masq(clone, mode); if (err) { posix_acl_release(clone); clone = NULL; } } posix_acl_release(*acl); *acl = clone; return err; } EXPORT_SYMBOL(__posix_acl_chmod); /** * posix_acl_chmod - chmod a posix acl * * @idmap: idmap of the mount @inode was found from * @dentry: dentry to check permissions on * @mode: the new mode of @inode * * If the dentry has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ int posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode) { struct inode *inode = d_inode(dentry); struct posix_acl *acl; int ret = 0; if (!IS_POSIXACL(inode)) return 0; if (!inode->i_op->set_acl) return -EOPNOTSUPP; acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR_OR_NULL(acl)) { if (acl == ERR_PTR(-EOPNOTSUPP)) return 0; return PTR_ERR(acl); } ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); if (ret) return ret; ret = inode->i_op->set_acl(idmap, dentry, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); return ret; } EXPORT_SYMBOL(posix_acl_chmod); int posix_acl_create(struct inode *dir, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl) { struct posix_acl *p; struct posix_acl *clone; int ret; *acl = NULL; *default_acl = NULL; if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) return 0; p = get_inode_acl(dir, ACL_TYPE_DEFAULT); if (!p || p == ERR_PTR(-EOPNOTSUPP)) { *mode &= ~current_umask(); return 0; } if (IS_ERR(p)) return PTR_ERR(p); ret = -ENOMEM; clone = posix_acl_clone(p, GFP_NOFS); if (!clone) goto err_release; ret = posix_acl_create_masq(clone, mode); if (ret < 0) goto err_release_clone; if (ret == 0) posix_acl_release(clone); else *acl = clone; if (!S_ISDIR(*mode)) posix_acl_release(p); else *default_acl = p; return 0; err_release_clone: posix_acl_release(clone); err_release: posix_acl_release(p); return ret; } EXPORT_SYMBOL_GPL(posix_acl_create); /** * posix_acl_update_mode - update mode in set_acl * @idmap: idmap of the mount @inode was found from * @inode: target inode * @mode_p: mode (pointer) for update * @acl: acl pointer * * Update the file mode when setting an ACL: compute the new file permission * bits based on the ACL. In addition, if the ACL is equivalent to the new * file mode, set *@acl to NULL to indicate that no ACL should be set. * * As with chmod, clear the setgid bit if the caller is not in the owning group * or capable of CAP_FSETID (see inode_change_ok). * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * Called from set_acl inode operations. */ int posix_acl_update_mode(struct mnt_idmap *idmap, struct inode *inode, umode_t *mode_p, struct posix_acl **acl) { umode_t mode = inode->i_mode; int error; error = posix_acl_equiv_mode(*acl, &mode); if (error < 0) return error; if (error == 0) *acl = NULL; if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; *mode_p = mode; return 0; } EXPORT_SYMBOL(posix_acl_update_mode); /* * Fix up the uids and gids in posix acl extended attributes in place. */ static int posix_acl_fix_xattr_common(const void *value, size_t size) { const struct posix_acl_xattr_header *header = value; int count; if (!header) return -EINVAL; if (size < sizeof(struct posix_acl_xattr_header)) return -EINVAL; if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) return -EOPNOTSUPP; count = posix_acl_xattr_count(size); if (count < 0) return -EINVAL; if (count == 0) return 0; return count; } /** * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format * @userns: the filesystem's idmapping * @value: the uapi representation of POSIX ACLs * @size: the size of @void * * Filesystems that store POSIX ACLs in the unaltered uapi format should use * posix_acl_from_xattr() when reading them from the backing store and * converting them into the struct posix_acl VFS format. The helper is * specifically intended to be called from the acl inode operation. * * The posix_acl_from_xattr() function will map the raw {g,u}id values stored * in ACL_{GROUP,USER} entries into idmapping in @userns. * * Note that posix_acl_from_xattr() does not take idmapped mounts into account. * If it did it calling it from the get acl inode operation would return POSIX * ACLs mapped according to an idmapped mount which would mean that the value * couldn't be cached for the filesystem. Idmapped mounts are taken into * account on the fly during permission checking or right at the VFS - * userspace boundary before reporting them to the user. * * Return: Allocated struct posix_acl on success, NULL for a valid header but * without actual POSIX ACL entries, or ERR_PTR() encoded error code. */ struct posix_acl *posix_acl_from_xattr(struct user_namespace *userns, const void *value, size_t size) { const struct posix_acl_xattr_header *header = value; const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end; int count; struct posix_acl *acl; struct posix_acl_entry *acl_e; count = posix_acl_fix_xattr_common(value, size); if (count < 0) return ERR_PTR(count); if (count == 0) return NULL; acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); acl_e = acl->a_entries; for (end = entry + count; entry != end; acl_e++, entry++) { acl_e->e_tag = le16_to_cpu(entry->e_tag); acl_e->e_perm = le16_to_cpu(entry->e_perm); switch(acl_e->e_tag) { case ACL_USER_OBJ: case ACL_GROUP_OBJ: case ACL_MASK: case ACL_OTHER: break; case ACL_USER: acl_e->e_uid = make_kuid(userns, le32_to_cpu(entry->e_id)); if (!uid_valid(acl_e->e_uid)) goto fail; break; case ACL_GROUP: acl_e->e_gid = make_kgid(userns, le32_to_cpu(entry->e_id)); if (!gid_valid(acl_e->e_gid)) goto fail; break; default: goto fail; } } return acl; fail: posix_acl_release(acl); return ERR_PTR(-EINVAL); } EXPORT_SYMBOL (posix_acl_from_xattr); /* * Convert from in-memory to extended attribute representation. */ int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size) { struct posix_acl_xattr_header *ext_acl = buffer; struct posix_acl_xattr_entry *ext_entry; int real_size, n; real_size = posix_acl_xattr_size(acl->a_count); if (!buffer) return real_size; if (real_size > size) return -ERANGE; ext_entry = (void *)(ext_acl + 1); ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); for (n=0; n < acl->a_count; n++, ext_entry++) { const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext_entry->e_tag = cpu_to_le16(acl_e->e_tag); ext_entry->e_perm = cpu_to_le16(acl_e->e_perm); switch(acl_e->e_tag) { case ACL_USER: ext_entry->e_id = cpu_to_le32(from_kuid(user_ns, acl_e->e_uid)); break; case ACL_GROUP: ext_entry->e_id = cpu_to_le32(from_kgid(user_ns, acl_e->e_gid)); break; default: ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); break; } } return real_size; } EXPORT_SYMBOL (posix_acl_to_xattr); /** * vfs_posix_acl_to_xattr - convert from kernel to userspace representation * @idmap: idmap of the mount * @inode: inode the posix acls are set on * @acl: the posix acls as represented by the vfs * @buffer: the buffer into which to convert @acl * @size: size of @buffer * * This converts @acl from the VFS representation in the filesystem idmapping * to the uapi form reportable to userspace. And mount and caller idmappings * are handled appropriately. * * Return: On success, the size of the stored uapi posix acls, on error a * negative errno. */ static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap, struct inode *inode, const struct posix_acl *acl, void *buffer, size_t size) { struct posix_acl_xattr_header *ext_acl = buffer; struct posix_acl_xattr_entry *ext_entry; struct user_namespace *fs_userns, *caller_userns; ssize_t real_size, n; vfsuid_t vfsuid; vfsgid_t vfsgid; real_size = posix_acl_xattr_size(acl->a_count); if (!buffer) return real_size; if (real_size > size) return -ERANGE; ext_entry = (void *)(ext_acl + 1); ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); fs_userns = i_user_ns(inode); caller_userns = current_user_ns(); for (n=0; n < acl->a_count; n++, ext_entry++) { const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext_entry->e_tag = cpu_to_le16(acl_e->e_tag); ext_entry->e_perm = cpu_to_le16(acl_e->e_perm); switch(acl_e->e_tag) { case ACL_USER: vfsuid = make_vfsuid(idmap, fs_userns, acl_e->e_uid); ext_entry->e_id = cpu_to_le32(from_kuid( caller_userns, vfsuid_into_kuid(vfsuid))); break; case ACL_GROUP: vfsgid = make_vfsgid(idmap, fs_userns, acl_e->e_gid); ext_entry->e_id = cpu_to_le32(from_kgid( caller_userns, vfsgid_into_kgid(vfsgid))); break; default: ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID); break; } } return real_size; } int set_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type, struct posix_acl *acl) { struct inode *inode = d_inode(dentry); if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; if (!inode->i_op->set_acl) return -EOPNOTSUPP; if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (acl) { int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl); if (ret) return ret; } return inode->i_op->set_acl(idmap, dentry, acl, type); } EXPORT_SYMBOL(set_posix_acl); int posix_acl_listxattr(struct inode *inode, char **buffer, ssize_t *remaining_size) { int err; if (!IS_POSIXACL(inode)) return 0; if (inode->i_acl) { err = xattr_list_one(buffer, remaining_size, XATTR_NAME_POSIX_ACL_ACCESS); if (err) return err; } if (inode->i_default_acl) { err = xattr_list_one(buffer, remaining_size, XATTR_NAME_POSIX_ACL_DEFAULT); if (err) return err; } return 0; } static bool posix_acl_xattr_list(struct dentry *dentry) { return IS_POSIXACL(d_backing_inode(dentry)); } /* * nop_posix_acl_access - legacy xattr handler for access POSIX ACLs * * This is the legacy POSIX ACL access xattr handler. It is used by some * filesystems to implement their ->listxattr() inode operation. New code * should never use them. */ const struct xattr_handler nop_posix_acl_access = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .list = posix_acl_xattr_list, }; EXPORT_SYMBOL_GPL(nop_posix_acl_access); /* * nop_posix_acl_default - legacy xattr handler for default POSIX ACLs * * This is the legacy POSIX ACL default xattr handler. It is used by some * filesystems to implement their ->listxattr() inode operation. New code * should never use them. */ const struct xattr_handler nop_posix_acl_default = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .list = posix_acl_xattr_list, }; EXPORT_SYMBOL_GPL(nop_posix_acl_default); int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int error; struct inode *inode = d_inode(dentry); if (type == ACL_TYPE_ACCESS) { error = posix_acl_update_mode(idmap, inode, &inode->i_mode, &acl); if (error) return error; } inode_set_ctime_current(inode); if (IS_I_VERSION(inode)) inode_inc_iversion(inode); set_cached_acl(inode, type, acl); return 0; } int simple_acl_create(struct inode *dir, struct inode *inode) { struct posix_acl *default_acl, *acl; int error; error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (error) return error; set_cached_acl(inode, ACL_TYPE_DEFAULT, default_acl); set_cached_acl(inode, ACL_TYPE_ACCESS, acl); if (default_acl) posix_acl_release(default_acl); if (acl) posix_acl_release(acl); return 0; } static int vfs_set_acl_idmapped_mnt(struct mnt_idmap *idmap, struct user_namespace *fs_userns, struct posix_acl *acl) { for (int n = 0; n < acl->a_count; n++) { struct posix_acl_entry *acl_e = &acl->a_entries[n]; switch (acl_e->e_tag) { case ACL_USER: acl_e->e_uid = from_vfsuid(idmap, fs_userns, VFSUIDT_INIT(acl_e->e_uid)); break; case ACL_GROUP: acl_e->e_gid = from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(acl_e->e_gid)); break; } } return 0; } /** * vfs_set_acl - set posix acls * @idmap: idmap of the mount * @dentry: the dentry based on which to set the posix acls * @acl_name: the name of the posix acl * @kacl: the posix acls in the appropriate VFS format * * This function sets @kacl. The caller must all posix_acl_release() on @kacl * afterwards. * * Return: On success 0, on error negative errno. */ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) { int acl_type; int error; struct inode *inode = d_inode(dentry); struct inode *delegated_inode = NULL; acl_type = posix_acl_type(acl_name); if (acl_type < 0) return -EINVAL; if (kacl) { /* * If we're on an idmapped mount translate from mount specific * vfs{g,u}id_t into global filesystem k{g,u}id_t. * Afterwards we can cache the POSIX ACLs filesystem wide and - * if this is a filesystem with a backing store - ultimately * translate them to backing store values. */ error = vfs_set_acl_idmapped_mnt(idmap, i_user_ns(inode), kacl); if (error) return error; } retry_deleg: inode_lock(inode); /* * We only care about restrictions the inode struct itself places upon * us otherwise POSIX ACLs aren't subject to any VFS restrictions. */ error = may_write_xattr(idmap, inode); if (error) goto out_inode_unlock; error = security_inode_set_acl(idmap, dentry, acl_name, kacl); if (error) goto out_inode_unlock; error = try_break_deleg(inode, &delegated_inode); if (error) goto out_inode_unlock; if (likely(!is_bad_inode(inode))) error = set_posix_acl(idmap, dentry, acl_type, kacl); else error = -EIO; if (!error) { fsnotify_xattr(dentry); security_inode_post_set_acl(dentry, acl_name, kacl); } out_inode_unlock: inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } return error; } EXPORT_SYMBOL_GPL(vfs_set_acl); /** * vfs_get_acl - get posix acls * @idmap: idmap of the mount * @dentry: the dentry based on which to retrieve the posix acls * @acl_name: the name of the posix acl * * This function retrieves @kacl from the filesystem. The caller must all * posix_acl_release() on @kacl. * * Return: On success POSIX ACLs in VFS format, on error negative errno. */ struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { struct inode *inode = d_inode(dentry); struct posix_acl *acl; int acl_type, error; acl_type = posix_acl_type(acl_name); if (acl_type < 0) return ERR_PTR(-EINVAL); /* * The VFS has no restrictions on reading POSIX ACLs so calling * something like xattr_permission() isn't needed. Only LSMs get a say. */ error = security_inode_get_acl(idmap, dentry, acl_name); if (error) return ERR_PTR(error); if (!IS_POSIXACL(inode)) return ERR_PTR(-EOPNOTSUPP); if (S_ISLNK(inode->i_mode)) return ERR_PTR(-EOPNOTSUPP); acl = __get_acl(idmap, dentry, inode, acl_type); if (IS_ERR(acl)) return acl; if (!acl) return ERR_PTR(-ENODATA); return acl; } EXPORT_SYMBOL_GPL(vfs_get_acl); /** * vfs_remove_acl - remove posix acls * @idmap: idmap of the mount * @dentry: the dentry based on which to retrieve the posix acls * @acl_name: the name of the posix acl * * This function removes posix acls. * * Return: On success 0, on error negative errno. */ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { int acl_type; int error; struct inode *inode = d_inode(dentry); struct inode *delegated_inode = NULL; acl_type = posix_acl_type(acl_name); if (acl_type < 0) return -EINVAL; retry_deleg: inode_lock(inode); /* * We only care about restrictions the inode struct itself places upon * us otherwise POSIX ACLs aren't subject to any VFS restrictions. */ error = may_write_xattr(idmap, inode); if (error) goto out_inode_unlock; error = security_inode_remove_acl(idmap, dentry, acl_name); if (error) goto out_inode_unlock; error = try_break_deleg(inode, &delegated_inode); if (error) goto out_inode_unlock; if (likely(!is_bad_inode(inode))) error = set_posix_acl(idmap, dentry, acl_type, NULL); else error = -EIO; if (!error) { fsnotify_xattr(dentry); security_inode_post_remove_acl(idmap, dentry, acl_name); } out_inode_unlock: inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } return error; } EXPORT_SYMBOL_GPL(vfs_remove_acl); int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, const void *kvalue, size_t size) { int error; struct posix_acl *acl = NULL; if (size) { /* * Note that posix_acl_from_xattr() uses GFP_NOFS when it * probably doesn't need to here. */ acl = posix_acl_from_xattr(current_user_ns(), kvalue, size); if (IS_ERR(acl)) return PTR_ERR(acl); } error = vfs_set_acl(idmap, dentry, acl_name, acl); posix_acl_release(acl); return error; } ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, void *kvalue, size_t size) { ssize_t error; struct posix_acl *acl; acl = vfs_get_acl(idmap, dentry, acl_name); if (IS_ERR(acl)) return PTR_ERR(acl); error = vfs_posix_acl_to_xattr(idmap, d_inode(dentry), acl, kvalue, size); posix_acl_release(acl); return error; }
210 213 391 391 135 23 135 126 54 54 2 53 45 92 86 119 372 391 20 20 19 18 19 14 14 19 20 149 83 148 1 1 1 1 149 36 134 63 37 34 12 12 1 11 25 4 4 4 4 3 3 2 1 1 1 1 1 13 23 3 3 28 103 25 149 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 library code, needed by static components when full IPv6 support is * not configured or static. */ #include <linux/export.h> #include <net/ipv6.h> /* * find out if nexthdr is a well-known extension header or a protocol */ bool ipv6_ext_hdr(u8 nexthdr) { /* * find out if nexthdr is an extension header or a protocol */ return (nexthdr == NEXTHDR_HOP) || (nexthdr == NEXTHDR_ROUTING) || (nexthdr == NEXTHDR_FRAGMENT) || (nexthdr == NEXTHDR_AUTH) || (nexthdr == NEXTHDR_NONE) || (nexthdr == NEXTHDR_DEST); } EXPORT_SYMBOL(ipv6_ext_hdr); /* * Skip any extension headers. This is used by the ICMP module. * * Note that strictly speaking this conflicts with RFC 2460 4.0: * ...The contents and semantics of each extension header determine whether * or not to proceed to the next header. Therefore, extension headers must * be processed strictly in the order they appear in the packet; a * receiver must not, for example, scan through a packet looking for a * particular kind of extension header and process that header prior to * processing all preceding ones. * * We do exactly this. This is a protocol bug. We can't decide after a * seeing an unknown discard-with-error flavour TLV option if it's a * ICMP error message or not (errors should never be send in reply to * ICMP error messages). * * But I see no other way to do this. This might need to be reexamined * when Linux implements ESP (and maybe AUTH) headers. * --AK * * This function parses (probably truncated) exthdr set "hdr". * "nexthdrp" initially points to some place, * where type of the first header can be found. * * It skips all well-known exthdrs, and returns pointer to the start * of unparsable area i.e. the first header with unknown type. * If it is not NULL *nexthdr is updated by type/protocol of this header. * * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL. * - it may return pointer pointing beyond end of packet, * if the last recognized header is truncated in the middle. * - if packet is truncated, so that all parsed headers are skipped, * it returns NULL. * - First fragment header is skipped, not-first ones * are considered as unparsable. * - Reports the offset field of the final fragment header so it is * possible to tell whether this is a first fragment, later fragment, * or not fragmented. * - ESP is unparsable for now and considered like * normal payload protocol. * - Note also special handling of AUTH header. Thanks to IPsec wizards. * * --ANK (980726) */ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp) { u8 nexthdr = *nexthdrp; *frag_offp = 0; while (ipv6_ext_hdr(nexthdr)) { struct ipv6_opt_hdr _hdr, *hp; int hdrlen; if (nexthdr == NEXTHDR_NONE) return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -1; if (nexthdr == NEXTHDR_FRAGMENT) { __be16 _frag_off, *fp; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), sizeof(_frag_off), &_frag_off); if (!fp) return -1; *frag_offp = *fp; if (ntohs(*frag_offp) & ~0x7) break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); nexthdr = hp->nexthdr; start += hdrlen; } *nexthdrp = nexthdr; return start; } EXPORT_SYMBOL(ipv6_skip_exthdr); int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); struct ipv6_opt_hdr *hdr; int len; if (offset + 2 > packet_len) goto bad; hdr = (struct ipv6_opt_hdr *)(nh + offset); len = ((hdr->hdrlen + 1) << 3); if (offset + len > packet_len) goto bad; offset += 2; len -= 2; while (len > 0) { int opttype = nh[offset]; int optlen; if (opttype == type) return offset; switch (opttype) { case IPV6_TLV_PAD1: optlen = 1; break; default: if (len < 2) goto bad; optlen = nh[offset + 1] + 2; if (optlen > len) goto bad; break; } offset += optlen; len -= optlen; } /* not_found */ bad: return -1; } EXPORT_SYMBOL_GPL(ipv6_find_tlv); /* * find the offset to specified header or the protocol number of last header * if target < 0. "last header" is transport protocol header, ESP, or * "No next header". * * Note that *offset is used as input/output parameter, and if it is not zero, * then it must be a valid offset to an inner IPv6 header. This can be used * to explore inner IPv6 header, eg. ICMPv6 error messages. * * If target header is found, its offset is set in *offset and return protocol * number. Otherwise, return -1. * * If the first fragment doesn't contain the final protocol header or * NEXTHDR_NONE it is considered invalid. * * Note that non-1st fragment is special case that "the protocol number * of last header" is "next header" field in Fragment header. In this case, * *offset is meaningless and fragment offset is stored in *fragoff if fragoff * isn't NULL. * * if flags is not NULL and it's a fragment, then the frag flag * IP6_FH_F_FRAG will be set. If it's an AH header, the * IP6_FH_F_AUTH flag is set and target < 0, then this function will * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this * function will skip all those routing headers, where segements_left was 0. */ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *flags) { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; bool found; if (fragoff) *fragoff = 0; if (*offset) { struct ipv6hdr _ip6, *ip6; ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); if (!ip6 || (ip6->version != 6)) return -EBADMSG; start = *offset + sizeof(struct ipv6hdr); nexthdr = ip6->nexthdr; } do { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { if (target < 0 || found) break; return -ENOENT; } hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -EBADMSG; if (nexthdr == NEXTHDR_ROUTING) { struct ipv6_rt_hdr _rh, *rh; rh = skb_header_pointer(skb, start, sizeof(_rh), &_rh); if (!rh) return -EBADMSG; if (flags && (*flags & IP6_FH_F_SKIP_RH) && rh->segments_left == 0) found = false; } if (nexthdr == NEXTHDR_FRAGMENT) { unsigned short _frag_off; __be16 *fp; if (flags) /* Indicate that this is a fragment */ *flags |= IP6_FH_F_FRAG; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), sizeof(_frag_off), &_frag_off); if (!fp) return -EBADMSG; _frag_off = ntohs(*fp) & ~0x7; if (_frag_off) { if (target < 0 && ((!ipv6_ext_hdr(hp->nexthdr)) || hp->nexthdr == NEXTHDR_NONE)) { if (fragoff) *fragoff = _frag_off; return hp->nexthdr; } if (!found) return -ENOENT; if (fragoff) *fragoff = _frag_off; break; } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0)) break; hdrlen = ipv6_authlen(hp); } else hdrlen = ipv6_optlen(hp); if (!found) { nexthdr = hp->nexthdr; start += hdrlen; } } while (!found); *offset = start; return nexthdr; } EXPORT_SYMBOL(ipv6_find_hdr);
16 801 2074 2081 572 217 20282 15758 579 4336 1780 926 2448 16 204 23 72 24339 1268 3156 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 /* SPDX-License-Identifier: GPL-2.0 */ /* * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). * * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. * (C) Linux Foundation 2008-2013 * Unified interface for all slab allocators */ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H #include <linux/cache.h> #include <linux/gfp.h> #include <linux/overflow.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/percpu-refcount.h> #include <linux/cleanup.h> #include <linux/hash.h> enum _slab_flag_bits { _SLAB_CONSISTENCY_CHECKS, _SLAB_RED_ZONE, _SLAB_POISON, _SLAB_KMALLOC, _SLAB_HWCACHE_ALIGN, _SLAB_CACHE_DMA, _SLAB_CACHE_DMA32, _SLAB_STORE_USER, _SLAB_PANIC, _SLAB_TYPESAFE_BY_RCU, _SLAB_TRACE, #ifdef CONFIG_DEBUG_OBJECTS _SLAB_DEBUG_OBJECTS, #endif _SLAB_NOLEAKTRACE, _SLAB_NO_MERGE, #ifdef CONFIG_FAILSLAB _SLAB_FAILSLAB, #endif #ifdef CONFIG_MEMCG _SLAB_ACCOUNT, #endif #ifdef CONFIG_KASAN_GENERIC _SLAB_KASAN, #endif _SLAB_NO_USER_FLAGS, #ifdef CONFIG_KFENCE _SLAB_SKIP_KFENCE, #endif #ifndef CONFIG_SLUB_TINY _SLAB_RECLAIM_ACCOUNT, #endif _SLAB_OBJECT_POISON, _SLAB_CMPXCHG_DOUBLE, #ifdef CONFIG_SLAB_OBJ_EXT _SLAB_NO_OBJ_EXT, #endif _SLAB_FLAGS_LAST_BIT }; #define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) #define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op */ /* DEBUG: Perform (expensive) checks on alloc/free */ #define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) /* DEBUG: Red zone objs in a cache */ #define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) /* DEBUG: Poison objects */ #define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ #define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) /** * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries. * * Sufficiently large objects are aligned on cache line boundary. For object * size smaller than a half of cache line size, the alignment is on the half of * cache line size. In general, if object size is smaller than 1/2^n of cache * line size, the alignment is adjusted to 1/2^n. * * If explicit alignment is also requested by the respective * &struct kmem_cache_args field, the greater of both is alignments is applied. */ #define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) /* Use GFP_DMA32 memory */ #define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) /* DEBUG: Store the last owner for bug hunting */ #define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ #define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) /** * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() * that memory location is free to be reused at any time. Thus it may * be possible to see another object there in the same RCU grace period. * * This feature only ensures the memory location backing the object * stays valid, the trick to using this is relying on an independent * object validation pass. Something like: * * :: * * begin: * rcu_read_lock(); * obj = lockless_lookup(key); * if (obj) { * if (!try_get_ref(obj)) // might fail for free objects * rcu_read_unlock(); * goto begin; * * if (obj->key != key) { // not the object we expected * put_ref(obj); * rcu_read_unlock(); * goto begin; * } * } * rcu_read_unlock(); * * This is useful if we need to approach a kernel structure obliquely, * from its address obtained without the usual locking. We can lock * the structure to stabilize it and check it's still at the given address, * only if we can be sure that the memory has not been meanwhile reused * for some other kind of object (which our subsystem's lock might corrupt). * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * * Note that it is not possible to acquire a lock within a structure * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages * are not zeroed before being given to the slab, which means that any * locks must be initialized after each and every kmem_struct_alloc(). * Alternatively, make the ctor passed to kmem_cache_create() initialize * the locks at page-allocation time, as is done in __i915_request_ctor(), * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers * to safely acquire those ctor-initialized locks under rcu_read_lock() * protection. * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ #define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ #define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif /* Avoid kmemleak tracing */ #define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) /* * Prevent merging with compatible kmem caches. This flag should be used * cautiously. Valid use cases: * * - caches created for self-tests (e.g. kunit) * - general caches created and used by a subsystem, only when a * (subsystem-specific) debug option is enabled * - performance critical caches, should be very rare and consulted with slab * maintainers, and not used together with CONFIG_SLUB_TINY */ #define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) /* Fault injection mark */ #ifdef CONFIG_FAILSLAB # define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /** * define SLAB_ACCOUNT - Account allocations to memcg. * * All object allocations from this cache will be memcg accounted, regardless of * __GFP_ACCOUNT being or not being passed to individual allocations. */ #ifdef CONFIG_MEMCG # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else # define SLAB_ACCOUNT __SLAB_FLAG_UNUSED #endif #ifdef CONFIG_KASAN_GENERIC #define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) #else #define SLAB_KASAN __SLAB_FLAG_UNUSED #endif /* * Ignore user specified debugging flags. * Intended for caches created for self-tests so they have only flags * specified in the code and other flags are ignored. */ #define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) #ifdef CONFIG_KFENCE #define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) #else #define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ /** * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable. * * Use this flag for caches that have an associated shrinker. As a result, slab * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by * mobility, and are accounted in SReclaimable counter in /proc/meminfo */ #ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* Slab created using create_boot_cache */ #ifdef CONFIG_SLAB_OBJ_EXT #define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) #else #define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED #endif /* * freeptr_t represents a SLUB freelist pointer, which might be encoded * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. */ typedef struct { unsigned long v; } freeptr_t; /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. * * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. * Both make kfree a no-op. */ #define ZERO_SIZE_PTR ((void *)16) #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) #include <linux/kasan.h> struct list_lru; struct mem_cgroup; /* * struct kmem_cache related prototypes */ bool slab_is_available(void); /** * struct kmem_cache_args - Less common arguments for kmem_cache_create() * * Any uninitialized fields of the structure are interpreted as unused. The * exception is @freeptr_offset where %0 is a valid value, so * @use_freeptr_offset must be also set to %true in order to interpret the field * as used. For @useroffset %0 is also valid, but only with non-%0 * @usersize. * * When %NULL args is passed to kmem_cache_create(), it is equivalent to all * fields unused. */ struct kmem_cache_args { /** * @align: The required alignment for the objects. * * %0 means no specific alignment is requested. */ unsigned int align; /** * @useroffset: Usercopy region offset. * * %0 is a valid offset, when @usersize is non-%0 */ unsigned int useroffset; /** * @usersize: Usercopy region size. * * %0 means no usercopy region is specified. */ unsigned int usersize; /** * @freeptr_offset: Custom offset for the free pointer * in &SLAB_TYPESAFE_BY_RCU caches * * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer * outside of the object. This might cause the object to grow in size. * Cache creators that have a reason to avoid this can specify a custom * free pointer offset in their struct where the free pointer will be * placed. * * Note that placing the free pointer inside the object requires the * caller to ensure that no fields are invalidated that are required to * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for * details). * * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset * is specified, %use_freeptr_offset must be set %true. * * Note that @ctor currently isn't supported with custom free pointers * as a @ctor requires an external free pointer. */ unsigned int freeptr_offset; /** * @use_freeptr_offset: Whether a @freeptr_offset is used. */ bool use_freeptr_offset; /** * @ctor: A constructor for the objects. * * The constructor is invoked for each object in a newly allocated slab * page. It is the cache user's responsibility to free object in the * same state as after calling the constructor, or deal appropriately * with any differences between a freshly constructed and a reallocated * object. * * %NULL means no constructor. */ void (*ctor)(void *); }; struct kmem_cache *__kmem_cache_create_args(const char *name, unsigned int object_size, struct kmem_cache_args *args, slab_flags_t flags); static inline struct kmem_cache * __kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)) { struct kmem_cache_args kmem_args = { .align = align, .ctor = ctor, }; return __kmem_cache_create_args(name, size, &kmem_args, flags); } /** * kmem_cache_create_usercopy - Create a kmem cache with a region suitable * for copying to userspace. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags * @useroffset: Usercopy region offset * @usersize: Usercopy region size * @ctor: A constructor for the objects, or %NULL. * * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() * if whitelisting a single field is sufficient, or kmem_cache_create() with * the necessary parameters passed via the args parameter (see * &struct kmem_cache_args) * * Return: a pointer to the cache on success, NULL on failure. */ static inline struct kmem_cache * kmem_cache_create_usercopy(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)) { struct kmem_cache_args kmem_args = { .align = align, .ctor = ctor, .useroffset = useroffset, .usersize = usersize, }; return __kmem_cache_create_args(name, size, &kmem_args, flags); } /* If NULL is passed for @args, use this variant with default arguments. */ static inline struct kmem_cache * __kmem_cache_default_args(const char *name, unsigned int size, struct kmem_cache_args *args, slab_flags_t flags) { struct kmem_cache_args kmem_default_args = {}; /* Make sure we don't get passed garbage. */ if (WARN_ON_ONCE(args)) return ERR_PTR(-EINVAL); return __kmem_cache_create_args(name, size, &kmem_default_args, flags); } /** * kmem_cache_create - Create a kmem cache. * @__name: A string which is used in /proc/slabinfo to identify this cache. * @__object_size: The size of objects to be created in this cache. * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL * means defaults will be used for all the arguments. * * This is currently implemented as a macro using ``_Generic()`` to call * either the new variant of the function, or a legacy one. * * The new variant has 4 parameters: * ``kmem_cache_create(name, object_size, args, flags)`` * * See __kmem_cache_create_args() which implements this. * * The legacy variant has 5 parameters: * ``kmem_cache_create(name, object_size, align, flags, ctor)`` * * The align and ctor parameters map to the respective fields of * &struct kmem_cache_args * * Context: Cannot be called within a interrupt, but can be interrupted. * * Return: a pointer to the cache on success, NULL on failure. */ #define kmem_cache_create(__name, __object_size, __args, ...) \ _Generic((__args), \ struct kmem_cache_args *: __kmem_cache_create_args, \ void *: __kmem_cache_default_args, \ default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_destroy(struct kmem_cache *s); int kmem_cache_shrink(struct kmem_cache *s); /* * Please use this macro to create slab caches. Simply specify the * name of the structure and maybe some flags that are listed above. * * The alignment of the struct determines object alignment. If you * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ #define KMEM_CACHE(__struct, __flags) \ __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ &(struct kmem_cache_args) { \ .align = __alignof__(struct __struct), \ }, (__flags)) /* * To whitelist a single field for copying to/from usercopy, use this * macro instead for KMEM_CACHE() above. */ #define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ &(struct kmem_cache_args) { \ .align = __alignof__(struct __struct), \ .useroffset = offsetof(struct __struct, __field), \ .usersize = sizeof_field(struct __struct, __field), \ }, (__flags)) /* * Common kmalloc functions provided by all allocators */ void * __must_check krealloc_noprof(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); #define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) /** * ksize - Report actual allocation size of associated object * * @objp: Pointer returned from a prior kmalloc()-family allocation. * * This should not be used for writing beyond the originally requested * allocation size. Either use krealloc() or round up the allocation size * with kmalloc_size_roundup() prior to allocation. If this is used to * access beyond the originally requested allocation size, UBSAN_BOUNDS * and/or FORTIFY_SOURCE may trip, since they only know about the * originally allocated size via the __alloc_size attribute. */ size_t ksize(const void *objp); #ifdef CONFIG_PRINTK bool kmem_dump_obj(void *object); #else static inline bool kmem_dump_obj(void *object) { return false; } #endif /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. * Setting ARCH_DMA_MINALIGN in arch headers allows that. */ #ifdef ARCH_HAS_DMA_MINALIGN #if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN) #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #endif #endif #ifndef ARCH_KMALLOC_MINALIGN #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #elif ARCH_KMALLOC_MINALIGN > 8 #define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) #endif /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. * Intended for arches that get misalignment faults even for 64 bit integer * aligned buffers. */ #ifndef ARCH_SLAB_MINALIGN #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif /* * Arches can define this function if they want to decide the minimum slab * alignment at runtime. The value returned by the function must be a power * of two and >= ARCH_SLAB_MINALIGN. */ #ifndef arch_slab_minalign static inline unsigned int arch_slab_minalign(void) { return ARCH_SLAB_MINALIGN; } #endif /* * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. */ #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) #define __assume_page_alignment __assume_aligned(PAGE_SIZE) /* * Kmalloc array related definitions */ /* * SLUB directly allocates requests fitting in to an order-1 page * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) /* Maximum order allocatable via the slab allocator */ #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* * Kmalloc subsystem. */ #ifndef KMALLOC_MIN_SIZE #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif /* * This restriction comes from byte sized index implementation. * Page size is normally 2^12 bytes and, in this case, if we want to use * byte sized index which can represent 2^8 entries, the size of the object * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. * If minimum size of kmalloc is less than 16, we use it as minimum object * size and give up to use byte sized index. */ #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) #ifdef CONFIG_RANDOM_KMALLOC_CACHES #define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies #else #define RANDOM_KMALLOC_CACHES_NR 0 #endif /* * Whenever changing this, take care of that kmalloc_type() and * create_kmalloc_caches() still work as intended. * * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP * is for accounted but unreclaimable and non-dma objects. All the other * kmem caches can have both accounted and unaccounted objects. */ enum kmalloc_cache_type { KMALLOC_NORMAL = 0, #ifndef CONFIG_ZONE_DMA KMALLOC_DMA = KMALLOC_NORMAL, #endif #ifndef CONFIG_MEMCG KMALLOC_CGROUP = KMALLOC_NORMAL, #endif KMALLOC_RANDOM_START = KMALLOC_NORMAL, KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, #ifdef CONFIG_SLUB_TINY KMALLOC_RECLAIM = KMALLOC_NORMAL, #else KMALLOC_RECLAIM, #endif #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, #endif #ifdef CONFIG_MEMCG KMALLOC_CGROUP, #endif NR_KMALLOC_TYPES }; typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; /* * Define gfp bits that should not be set for KMALLOC_NORMAL. */ #define KMALLOC_NOT_NORMAL_BITS \ (__GFP_RECLAIMABLE | \ (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) extern unsigned long random_kmalloc_seed; static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) { /* * The most common case is KMALLOC_NORMAL, so test for it * with a single branch for all the relevant flags. */ if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) #ifdef CONFIG_RANDOM_KMALLOC_CACHES /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); #else return KMALLOC_NORMAL; #endif /* * At least one of the flags has to be set. Their priorities in * decreasing order are: * 1) __GFP_DMA * 2) __GFP_RECLAIMABLE * 3) __GFP_ACCOUNT */ if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) return KMALLOC_DMA; if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE)) return KMALLOC_RECLAIM; else return KMALLOC_CGROUP; } /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. * 0 = zero alloc * 1 = 65 .. 96 bytes * 2 = 129 .. 192 bytes * n = 2^(n-1)+1 .. 2^n * * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; * typical usage is via kmalloc_index() and therefore evaluated at compile-time. * Callers where !size_is_constant should only be test modules, where runtime * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). */ static __always_inline unsigned int __kmalloc_index(size_t size, bool size_is_constant) { if (!size) return 0; if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) return 1; if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) return 2; if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; if (size <= 64) return 6; if (size <= 128) return 7; if (size <= 256) return 8; if (size <= 512) return 9; if (size <= 1024) return 10; if (size <= 2 * 1024) return 11; if (size <= 4 * 1024) return 12; if (size <= 8 * 1024) return 13; if (size <= 16 * 1024) return 14; if (size <= 32 * 1024) return 15; if (size <= 64 * 1024) return 16; if (size <= 128 * 1024) return 17; if (size <= 256 * 1024) return 18; if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); else BUG(); /* Will never be reached. Needed because the compiler may complain */ return -1; } static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) #include <linux/alloc_tag.h> /** * kmem_cache_alloc - Allocate an object * @cachep: The cache to allocate from. * @flags: See kmalloc(). * * Allocate an object from this cache. * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. * * Return: pointer to the new object or %NULL in case of error */ void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; #define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; #define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) /** * kmem_cache_charge - memcg charge an already allocated slab memory * @objp: address of the slab object to memcg charge * @gfpflags: describe the allocation context * * kmem_cache_charge allows charging a slab object to the current memcg, * primarily in cases where charging at allocation time might not be possible * because the target memcg is not known (i.e. softirq context) * * The objp should be pointer returned by the slab allocator functions like * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge * behavior can be controlled through gfpflags parameter, which affects how the * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes * that overcharging is requested instead of failure, but is not applied for the * internal metadata allocation. * * There are several cases where it will return true even if the charging was * not done: * More specifically: * * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. * 2. Already charged slab objects. * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() * without __GFP_ACCOUNT * 4. Allocating internal metadata has failed * * Return: true if charge was successful otherwise false. */ bool kmem_cache_charge(void *objp, gfp_t gfpflags); void kmem_cache_free(struct kmem_cache *s, void *objp); kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); /* * Bulk allocation and freeing operations. These are accelerated in an * allocator specific way to avoid taking locks repeatedly or building * metadata structures unnecessarily. * * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); #define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) static __always_inline void kfree_bulk(size_t size, void **p) { kmem_cache_free_bulk(NULL, size, p); } void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) /* * These macros allow declaring a kmem_buckets * parameter alongside size, which * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call * sites don't have to pass NULL. */ #ifdef CONFIG_SLAB_BUCKETS #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) #define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) #define PASS_BUCKET_PARAM(_b) (_b) #else #define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) #define PASS_BUCKET_PARAMS(_size, _b) (_size) #define PASS_BUCKET_PARAM(_b) NULL #endif /* * The following functions are not to be used directly and are intended only * for internal use from kmalloc() and kmalloc_node() * with the exception of kunit tests */ void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); void *__kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment __alloc_size(1); /** * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. * @flags: describe the allocation context * * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN * bytes. For @size of power of two bytes, the alignment is also guaranteed * to be at least to the size. For other sizes, the alignment is guaranteed to * be at least the largest power-of-two divisor of @size. * * The @flags argument may be one of the GFP flags defined at * include/linux/gfp_types.h and described at * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` * * The recommended usage of the @flags is described at * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>` * * Below is a brief outline of the most useful GFP flags * * %GFP_KERNEL * Allocate normal kernel ram. May sleep. * * %GFP_NOWAIT * Allocation will not sleep. * * %GFP_ATOMIC * Allocation will not sleep. May use emergency pools. * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * * %__GFP_ZERO * Zero the allocated memory before returning. Also see kzalloc(). * * %__GFP_HIGH * This allocation has high priority and may use emergency pools. * * %__GFP_NOFAIL * Indicate that this allocation is in no way allowed to fail * (think twice before using). * * %__GFP_NORETRY * If memory is not immediately available, * then give up at once. * * %__GFP_NOWARN * If allocation fails, don't issue any warnings. * * %__GFP_RETRY_MAYFAIL * Try really hard to succeed the allocation but fail * eventually. */ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return __kmalloc_large_noprof(size, flags); index = kmalloc_index(size); return __kmalloc_cache_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } return __kmalloc_noprof(size, flags); } #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) #define kmem_buckets_alloc(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) #define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return __kmalloc_large_node_noprof(size, flags, node); index = kmalloc_index(size); return __kmalloc_cache_node_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); } #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) /** * kmalloc_array - allocate memory for an array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) return kmalloc_noprof(bytes, flags); return kmalloc_noprof(bytes, flags); } #define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) /** * krealloc_array - reallocate memory for an array. * @p: pointer to the memory chunk to reallocate * @new_n: new number of elements to alloc * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) * * If __GFP_ZERO logic is requested, callers must ensure that, starting with the * initial memory allocation, every subsequent call to this API for the same * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that * __GFP_ZERO is not fully honored by this API. * * See krealloc_noprof() for further details. * * In any case, the contents of the object pointed to are preserved up to the * lesser of the new and old sizes. */ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, size_t new_n, size_t new_size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) return NULL; return krealloc_noprof(p, bytes, flags); } #define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) /** * kcalloc - allocate memory for an array. The memory is set to zero. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, unsigned long caller) __alloc_size(1); #define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) #define kmalloc_node_track_caller(...) \ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) /* * kmalloc_track_caller is a special version of kmalloc that records the * calling function of the routine calling it for slab leak tracking instead * of just the calling function (confusing, eh?). * It's useful when the call to kmalloc comes from a widely-used standard * allocator where we care about the real place the memory allocation * request comes from. */ #define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) #define kmalloc_track_caller_noprof(...) \ kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) return kmalloc_node_noprof(bytes, flags, node); return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); } #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) #define kcalloc_node(_n, _size, _flags, _node) \ kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) /* * Shortcuts */ #define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) /** * kzalloc - allocate memory. The memory is set to zero. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) { return kmalloc_noprof(size, flags | __GFP_ZERO); } #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); #define kvmalloc_node_noprof(size, flags, node) \ __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) #define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) #define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) #define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) #define kmem_buckets_valloc(_b, _size, _flags) \ alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; return kvmalloc_node_noprof(bytes, flags, node); } #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) #define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) #define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) #define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) __realloc_size(2); #define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); /** * kmalloc_size_roundup - Report allocation bucket size for the given size * * @size: Number of bytes to round up from. * * This returns the number of bytes that would be available in a kmalloc() * allocation of @size bytes. For example, a 126 byte request would be * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly * for the general-purpose kmalloc()-based allocations, and is not for the * pre-sized kmem_cache_alloc()-based allocations.) * * Use this to kmalloc() the full bucket size ahead of time instead of using * ksize() to query the size after an allocation. */ size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); void __init kvfree_rcu_init(void); #endif /* _LINUX_SLAB_H */
15 1 1 3 5 5 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_LWTUNNEL_H #define __NET_LWTUNNEL_H 1 #include <linux/lwtunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/route.h> #define LWTUNNEL_HASH_BITS 7 #define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) /* lw tunnel state flags */ #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) #define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) /* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety. */ enum { LWTUNNEL_XMIT_DONE, LWTUNNEL_XMIT_CONTINUE = 0x100, }; struct lwtunnel_state { __u16 type; __u16 flags; __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); struct rcu_head rcu; __u8 data[]; }; struct lwtunnel_encap_ops { int (*build_state)(struct net *net, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack); void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); struct module *owner; }; #ifdef CONFIG_LWTUNNEL DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { if (lws) atomic_inc(&lws->refcnt); return lws; } static inline void lwtstate_put(struct lwtunnel_state *lws) { if (!lws) return; if (atomic_dec_and_test(&lws->refcnt)) lwtstate_free(lws); } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) return true; return false; } static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT)) return true; return false; } static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) { if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT)) return true; return false; } static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { if ((lwtunnel_xmit_redirect(lwtstate) || lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu) return lwtstate->headroom; return 0; } int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, struct netlink_ext_ack *extack); int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress); static inline void lwtunnel_set_redirect(struct dst_entry *dst) { if (lwtunnel_output_redirect(dst->lwtstate)) { dst->lwtstate->orig_output = dst->output; dst->output = lwtunnel_output; } if (lwtunnel_input_redirect(dst->lwtstate)) { dst->lwtstate->orig_input = dst->input; dst->input = lwtunnel_input; } } #else static inline void lwtstate_free(struct lwtunnel_state *lws) { } static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) { return lws; } static inline void lwtstate_put(struct lwtunnel_state *lws) { } static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) { return false; } static inline void lwtunnel_set_redirect(struct dst_entry *dst) { } static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { return 0; } static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { return -EOPNOTSUPP; } static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { return -EOPNOTSUPP; } static inline int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. */ return 0; } static inline int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, int encap_attr, int encap_type_attr) { return 0; } static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) { return 0; } static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) { return NULL; } static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { return 0; } static inline int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return -EOPNOTSUPP; } static inline int lwtunnel_input(struct sk_buff *skb) { return -EOPNOTSUPP; } static inline int lwtunnel_xmit(struct sk_buff *skb) { return -EOPNOTSUPP; } #endif /* CONFIG_LWTUNNEL */ #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) #endif /* __NET_LWTUNNEL_H */
5 883 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGALLOC_H #define _ASM_X86_PGALLOC_H #include <linux/threads.h> #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include <asm-generic/pgalloc.h> static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) {} static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_release_pte(unsigned long pfn) {} static inline void paravirt_release_pmd(unsigned long pfn) {} static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif /* * Flags to use when allocating a user page table page. */ extern gfp_t __userpte_alloc_gfp; #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Instead of one PGD, we acquire two PGDs. Being order-1, it is * both 8k in size and 8k-aligned. That lets us just flip bit 12 * in a pointer to swap between the two 4k halves. */ #define PGD_ALLOCATION_ORDER 1 #else #define PGD_ALLOCATION_ORDER 0 #endif /* * Allocate and free page tables. */ extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern pgtable_t pte_alloc_one(struct mm_struct *); extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, unsigned long address) { ___pte_free_tlb(tlb, pte); } static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } static inline void pmd_populate_kernel_safe(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); set_pmd_safe(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { unsigned long pfn = page_to_pfn(pte); paravirt_alloc_pte(mm, pfn); set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); } #if CONFIG_PGTABLE_LEVELS > 2 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { ___pmd_free_tlb(tlb, pmd); } #ifdef CONFIG_X86_PAE extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); #else /* !CONFIG_X86_PAE */ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); } static inline void pud_populate_safe(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); set_pud_safe(pud, __pud(_PAGE_TABLE | __pa(pmd))); } #endif /* CONFIG_X86_PAE */ #if CONFIG_PGTABLE_LEVELS > 3 static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { ___pud_free_tlb(tlb, pud); } #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { if (!pgtable_l5_enabled()) return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { if (!pgtable_l5_enabled()) return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { if (pgtable_l5_enabled()) ___p4d_free_tlb(tlb, p4d); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #endif /* _ASM_X86_PGALLOC_H */
139 137 3 137 137 137 139 48 64 53 16 4 4 4 4 4 4 65 66 44 65 54 37 54 45 66 66 44 44 44 65 66 66 157 81 22 60 78 157 51 20 118 58 58 58 58 58 58 57 58 39 41 41 57 41 58 2 58 15 47 43 10 43 1 42 1 47 42 42 47 18 18 15 18 47 42 37 47 18 18 18 14 6 18 18 1 47 3 3 58 58 8 9 9 9 8 9 2 2 1 2 24 9 23 2 24 13 13 7 7 6 5 1 5 5 1 12 13 11 7 4 4 4 4 11 52 51 51 49 26 23 23 48 52 34 34 34 1 4 2 1 2 1 1 9 8 1 8 8 2 2 8 12 20 20 54 31 7 54 34 34 34 34 34 34 8 3 6 6 34 34 34 3 4 4 3 13 12 3 11 18 49 49 49 13 18 6 12 12 41 38 4 39 37 35 47 27 47 46 35 35 34 34 33 34 34 16 34 28 2 34 34 44 3 44 33 32 33 33 33 33 33 33 33 8 8 5 7 6 6 33 33 6 6 6 3 6 6 3 33 40 2 2 42 39 42 39 39 39 39 27 27 27 27 24 3 23 23 20 20 27 39 20 20 3 20 20 39 43 44 88 88 88 85 85 85 48 104 105 104 49 15 90 8 4 4 86 105 60 46 45 41 40 10 9 57 57 56 46 54 103 102 79 59 103 42 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 // SPDX-License-Identifier: GPL-2.0-or-later /* * inet_diag.c Module for monitoring INET transport protocols sockets. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/cache.h> #include <linux/init.h> #include <linux/time.h> #include <net/icmp.h> #include <net/tcp.h> #include <net/ipv6.h> #include <net/inet_common.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/inet6_hashtables.h> #include <net/bpf_sk_storage.h> #include <net/netlink.h> #include <linux/inet.h> #include <linux/stddef.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> static const struct inet_diag_handler __rcu **inet_diag_table; struct inet_diag_entry { const __be32 *saddr; const __be32 *daddr; u16 sport; u16 dport; u16 family; u16 userlocks; u32 ifindex; u32 mark; #ifdef CONFIG_SOCK_CGROUP_DATA u64 cgroup_id; #endif }; static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { const struct inet_diag_handler *handler; if (proto < 0 || proto >= IPPROTO_MAX) return NULL; if (!READ_ONCE(inet_diag_table[proto])) sock_load_diag_module(AF_INET, proto); rcu_read_lock(); handler = rcu_dereference(inet_diag_table[proto]); if (handler && !try_module_get(handler->owner)) handler = NULL; rcu_read_unlock(); return handler; } static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) { module_put(handler->owner); } void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) { r->idiag_family = sk->sk_family; r->id.idiag_sport = htons(sk->sk_num); r->id.idiag_dport = sk->sk_dport; r->id.idiag_if = sk->sk_bound_dev_if; sock_diag_save_cookie(sk, r->id.idiag_cookie); #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; } else #endif { memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = sk->sk_rcv_saddr; r->id.idiag_dst[0] = sk->sk_daddr; } } EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); static size_t inet_sk_attr_size(struct sock *sk, const struct inet_diag_req_v2 *req, bool net_admin) { const struct inet_diag_handler *handler; size_t aux = 0; rcu_read_lock(); handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); DEBUG_NET_WARN_ON_ONCE(!handler); if (handler && handler->idiag_get_aux_size) aux = handler->idiag_get_aux_size(sk, net_admin); rcu_read_unlock(); return nla_total_size(sizeof(struct tcp_info)) + nla_total_size(sizeof(struct inet_diag_msg)) + inet_diag_msg_attrs_size() + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) + aux + 64; } int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin) { const struct inet_sock *inet = inet_sk(sk); struct inet_diag_sockopt inet_sockopt; if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) goto errout; /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ if (ext & (1 << (INET_DIAG_TOS - 1))) if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) goto errout; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { if (ext & (1 << (INET_DIAG_TCLASS - 1))) if (nla_put_u8(skb, INET_DIAG_TCLASS, inet6_sk(sk)->tclass) < 0) goto errout; if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk))) goto errout; } #endif if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) goto errout; if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif /* Fallback to socket priority if class id isn't set. * Classful qdiscs use it as direct reference to class. * For cgroup2 classid is always zero. */ if (!classid) classid = READ_ONCE(sk->sk_priority); if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; } #ifdef CONFIG_SOCK_CGROUP_DATA if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID, cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)), INET_DIAG_PAD)) goto errout; #endif r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); memset(&inet_sockopt, 0, sizeof(inet_sockopt)); inet_sockopt.recverr = inet_test_bit(RECVERR, sk); inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk); inet_sockopt.freebind = inet_test_bit(FREEBIND, sk); inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk); inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk); inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk); inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk); inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk); inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk); inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk); if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt), &inet_sockopt)) goto errout; return 0; errout: return 1; } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, struct nlattr **req_nlas) { struct nlattr *nla; int remaining; nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { int type = nla_type(nla); if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) return -EINVAL; if (type < __INET_DIAG_REQ_MAX) req_nlas[type] = nla; } return 0; } static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, const struct inet_diag_dump_data *data) { if (data->req_nlas[INET_DIAG_REQ_PROTOCOL]) return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]); return req->sdiag_protocol; } #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; struct inet_diag_dump_data *cb_data; int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; u8 icsk_pending; int protocol; cb_data = cb->data; protocol = inet_diag_get_protocol(req, cb_data); /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */ handler = rcu_dereference_protected(inet_diag_table[protocol], 1); DEBUG_NET_WARN_ON_ONCE(!handler); if (!handler) return -ENXIO; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); BUG_ON(!sk_fullsock(sk)); inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, sk_user_ns(NETLINK_CB(cb->skb).sk), net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), .idiag_fmem = sk_forward_alloc_get(sk), .idiag_tmem = sk_wmem_alloc_get(sk), }; if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0) goto errout; } if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; /* * RAW sockets might have user-defined protocols assigned, * so report the one supplied on socket creation. */ if (sk->sk_type == SOCK_RAW) { if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) goto errout; } if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; } icsk_pending = smp_load_acquire(&icsk->icsk_pending); if (icsk_pending == ICSK_TIME_RETRANS || icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { attr = nla_reserve_64bit(skb, INET_DIAG_INFO, handler->idiag_info_size, INET_DIAG_PAD); if (!attr) goto errout; info = nla_data(attr); } if (ext & (1 << (INET_DIAG_CONG - 1))) { int err = 0; rcu_read_lock(); ca_ops = READ_ONCE(icsk->icsk_ca_ops); if (ca_ops) err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); rcu_read_unlock(); if (err < 0) goto errout; } handler->idiag_get_info(sk, r, info); if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) if (handler->idiag_get_aux(sk, net_admin, skb) < 0) goto errout; if (sk->sk_state < TCP_TIME_WAIT) { union tcp_cc_info info; size_t sz = 0; int attr; rcu_read_lock(); ca_ops = READ_ONCE(icsk->icsk_ca_ops); if (ca_ops && ca_ops->get_info) sz = ca_ops->get_info(sk, ext, &attr, &info); rcu_read_unlock(); if (sz && nla_put(skb, attr, sz, &info) < 0) goto errout; } /* Keep it at the end for potential retry with a larger skb, * or else do best-effort fitting, which is only done for the * first_nlmsg. */ if (cb_data->bpf_stg_diag) { bool first_nlmsg = ((unsigned char *)nlh == skb->data); unsigned int prev_min_dump_alloc; unsigned int total_nla_size = 0; unsigned int msg_len; int err; msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, INET_DIAG_SK_BPF_STORAGES, &total_nla_size); if (!err) goto out; total_nla_size += msg_len; prev_min_dump_alloc = cb->min_dump_alloc; if (total_nla_size > prev_min_dump_alloc) cb->min_dump_alloc = min_t(u32, total_nla_size, MAX_DUMP_ALLOC_SIZE); if (!first_nlmsg) goto errout; if (cb->min_dump_alloc > prev_min_dump_alloc) /* Retry with pskb_expand_head() with * __GFP_DIRECT_RECLAIM */ goto errout; WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); /* Send what we have for this sk * and move on to the next sk in the following * dump() */ } out: nlmsg_end(skb, nlh); return 0; errout: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_twsk_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, u16 nlmsg_flags, bool net_admin) { struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; r->idiag_state = READ_ONCE(tw->tw_substate); r->idiag_timer = 3; tmo = tw->tw_timer.expires - jiffies; r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, tw->tw_mark)) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; } nlmsg_end(skb, nlh); return 0; } static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, u16 nlmsg_flags, bool net_admin) { struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); inet_diag_msg_common_fill(r, sk); r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; r->idiag_retrans = reqsk->num_retrans; BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != offsetof(struct sock, sk_cookie)); tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, inet_rsk(reqsk)->ir_mark)) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; } nlmsg_end(skb, nlh); return 0; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, u16 nlmsg_flags, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); if (sk->sk_state == TCP_NEW_SYN_RECV) return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, struct inet_hashinfo *hashinfo, const struct inet_diag_req_v2 *req) { struct sock *sk; rcu_read_lock(); if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3], req->id.idiag_dport, req->id.idiag_src[3], req->id.idiag_sport, req->id.idiag_if); else sk = inet6_lookup(net, hashinfo, NULL, 0, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, req->id.idiag_if); } #endif else { rcu_read_unlock(); return ERR_PTR(-EINVAL); } rcu_read_unlock(); if (!sk) return ERR_PTR(-ENOENT); if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { sock_gen_put(sk); return ERR_PTR(-ENOENT); } return sk; } EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { struct sk_buff *in_skb = cb->skb; bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); struct net *net = sock_net(in_skb->sk); struct sk_buff *rep; struct sock *sk; int err; sk = inet_diag_find_one_icsk(net, hashinfo, req); if (IS_ERR(sk)) return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); if (!rep) { err = -ENOMEM; goto out; } err = sk_diag_fill(sk, rep, cb, req, 0, net_admin); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); goto out; } err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); out: if (sk) sock_gen_put(sk); return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, int hdrlen, const struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; struct inet_diag_dump_data dump_data; int err, protocol; memset(&dump_data, 0, sizeof(dump_data)); err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); if (err) return err; protocol = inet_diag_get_protocol(req, &dump_data); handler = inet_diag_lock_handler(protocol); if (!handler) return -ENOENT; if (cmd == SOCK_DIAG_BY_FAMILY) { struct netlink_callback cb = { .nlh = nlh, .skb = in_skb, .data = &dump_data, }; err = handler->dump_one(&cb, req); } else if (cmd == SOCK_DESTROY && handler->destroy) { err = handler->destroy(in_skb, req); } else { err = -EOPNOTSUPP; } inet_diag_unlock_handler(handler); return err; } static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) { int words = bits >> 5; bits &= 0x1f; if (words) { if (memcmp(a1, a2, words << 2)) return 0; } if (bits) { __be32 w1, w2; __be32 mask; w1 = a1[words]; w2 = a2[words]; mask = htonl((0xffffffff) << (32 - bits)); if ((w1 ^ w2) & mask) return 0; } return 1; } static int inet_diag_bc_run(const struct nlattr *_bc, const struct inet_diag_entry *entry) { const void *bc = nla_data(_bc); int len = nla_len(_bc); while (len > 0) { int yes = 1; const struct inet_diag_bc_op *op = bc; switch (op->code) { case INET_DIAG_BC_NOP: break; case INET_DIAG_BC_JMP: yes = 0; break; case INET_DIAG_BC_S_EQ: yes = entry->sport == op[1].no; break; case INET_DIAG_BC_S_GE: yes = entry->sport >= op[1].no; break; case INET_DIAG_BC_S_LE: yes = entry->sport <= op[1].no; break; case INET_DIAG_BC_D_EQ: yes = entry->dport == op[1].no; break; case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; break; case INET_DIAG_BC_D_LE: yes = entry->dport <= op[1].no; break; case INET_DIAG_BC_AUTO: yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); break; case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { const struct inet_diag_hostcond *cond; const __be32 *addr; cond = (const struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { yes = 0; break; } if (op->code == INET_DIAG_BC_S_COND) addr = entry->saddr; else addr = entry->daddr; if (cond->family != AF_UNSPEC && cond->family != entry->family) { if (entry->family == AF_INET6 && cond->family == AF_INET) { if (addr[0] == 0 && addr[1] == 0 && addr[2] == htonl(0xffff) && bitstring_match(addr + 3, cond->addr, cond->prefix_len)) break; } yes = 0; break; } if (cond->prefix_len == 0) break; if (bitstring_match(addr, cond->addr, cond->prefix_len)) break; yes = 0; break; } case INET_DIAG_BC_DEV_COND: { u32 ifindex; ifindex = *((const u32 *)(op + 1)); if (ifindex != entry->ifindex) yes = 0; break; } case INET_DIAG_BC_MARK_COND: { struct inet_diag_markcond *cond; cond = (struct inet_diag_markcond *)(op + 1); if ((entry->mark & cond->mask) != cond->mark) yes = 0; break; } #ifdef CONFIG_SOCK_CGROUP_DATA case INET_DIAG_BC_CGROUP_COND: { u64 cgroup_id; cgroup_id = get_unaligned((const u64 *)(op + 1)); if (cgroup_id != entry->cgroup_id) yes = 0; break; } #endif } if (yes) { len -= op->yes; bc += op->yes; } else { len -= op->no; bc += op->no; } } return len == 0; } /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV) */ static void entry_fill_addrs(struct inet_diag_entry *entry, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32; entry->daddr = sk->sk_v6_daddr.s6_addr32; } else #endif { entry->saddr = &sk->sk_rcv_saddr; entry->daddr = &sk->sk_daddr; } } int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct inet_diag_entry entry; if (!bc) return 1; entry.family = sk->sk_family; entry_fill_addrs(&entry, sk); entry.sport = inet->inet_num; entry.dport = ntohs(inet->inet_dport); entry.ifindex = sk->sk_bound_dev_if; entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; if (sk_fullsock(sk)) entry.mark = READ_ONCE(sk->sk_mark); else if (sk->sk_state == TCP_NEW_SYN_RECV) entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; else if (sk->sk_state == TCP_TIME_WAIT) entry.mark = inet_twsk(sk)->tw_mark; else entry.mark = 0; #ifdef CONFIG_SOCK_CGROUP_DATA entry.cgroup_id = sk_fullsock(sk) ? cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0; #endif return inet_diag_bc_run(bc, &entry); } EXPORT_SYMBOL_GPL(inet_diag_bc_sk); static int valid_cc(const void *bc, int len, int cc) { while (len >= 0) { const struct inet_diag_bc_op *op = bc; if (cc > len) return 0; if (cc == len) return 1; if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; } return 0; } /* data is u32 ifindex */ static bool valid_devcond(const struct inet_diag_bc_op *op, int len, int *min_len) { /* Check ifindex space. */ *min_len += sizeof(u32); if (len < *min_len) return false; return true; } /* Validate an inet_diag_hostcond. */ static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, int *min_len) { struct inet_diag_hostcond *cond; int addr_len; /* Check hostcond space. */ *min_len += sizeof(struct inet_diag_hostcond); if (len < *min_len) return false; cond = (struct inet_diag_hostcond *)(op + 1); /* Check address family and address length. */ switch (cond->family) { case AF_UNSPEC: addr_len = 0; break; case AF_INET: addr_len = sizeof(struct in_addr); break; case AF_INET6: addr_len = sizeof(struct in6_addr); break; default: return false; } *min_len += addr_len; if (len < *min_len) return false; /* Check prefix length (in bits) vs address length (in bytes). */ if (cond->prefix_len > 8 * addr_len) return false; return true; } /* Validate a port comparison operator. */ static bool valid_port_comparison(const struct inet_diag_bc_op *op, int len, int *min_len) { /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ *min_len += sizeof(struct inet_diag_bc_op); if (len < *min_len) return false; return true; } static bool valid_markcond(const struct inet_diag_bc_op *op, int len, int *min_len) { *min_len += sizeof(struct inet_diag_markcond); return len >= *min_len; } #ifdef CONFIG_SOCK_CGROUP_DATA static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len, int *min_len) { *min_len += sizeof(u64); return len >= *min_len; } #endif static int inet_diag_bc_audit(const struct nlattr *attr, const struct sk_buff *skb) { bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); const void *bytecode, *bc; int bytecode_len, len; if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) return -EINVAL; bytecode = bc = nla_data(attr); len = bytecode_len = nla_len(attr); while (len > 0) { int min_len = sizeof(struct inet_diag_bc_op); const struct inet_diag_bc_op *op = bc; switch (op->code) { case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: if (!valid_hostcond(bc, len, &min_len)) return -EINVAL; break; case INET_DIAG_BC_DEV_COND: if (!valid_devcond(bc, len, &min_len)) return -EINVAL; break; case INET_DIAG_BC_S_EQ: case INET_DIAG_BC_S_GE: case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_EQ: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: if (!valid_port_comparison(bc, len, &min_len)) return -EINVAL; break; case INET_DIAG_BC_MARK_COND: if (!net_admin) return -EPERM; if (!valid_markcond(bc, len, &min_len)) return -EINVAL; break; #ifdef CONFIG_SOCK_CGROUP_DATA case INET_DIAG_BC_CGROUP_COND: if (!valid_cgroupcond(bc, len, &min_len)) return -EINVAL; break; #endif case INET_DIAG_BC_AUTO: case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: break; default: return -EINVAL; } if (op->code != INET_DIAG_BC_NOP) { if (op->no < min_len || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; } if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) return -EINVAL; bc += op->yes; len -= op->yes; } return len == 0 ? 0 : -EINVAL; } static void twsk_build_assert(void) { BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) != offsetof(struct sock, sk_family)); BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) != offsetof(struct inet_sock, inet_num)); BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) != offsetof(struct inet_sock, inet_dport)); BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) != offsetof(struct inet_sock, inet_rcv_saddr)); BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) != offsetof(struct inet_sock, inet_daddr)); #if IS_ENABLED(CONFIG_IPV6) BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) != offsetof(struct sock, sk_v6_rcv_saddr)); BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) != offsetof(struct sock, sk_v6_daddr)); #endif } void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct inet_diag_dump_data *cb_data = cb->data; struct net *net = sock_net(skb->sk); u32 idiag_states = r->idiag_states; int i, num, s_i, s_num; struct nlattr *bc; struct sock *sk; bc = cb_data->inet_diag_nla_bc; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; s_i = cb->args[1]; s_num = num = cb->args[2]; if (cb->args[0] == 0) { if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) goto skip_listen_ht; for (i = s_i; i <= hashinfo->lhash2_mask; i++) { struct inet_listen_hashbucket *ilb; struct hlist_nulls_node *node; num = 0; ilb = &hashinfo->lhash2[i]; if (hlist_nulls_empty(&ilb->nulls_head)) { s_num = 0; continue; } spin_lock(&ilb->lock); sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) { num++; continue; } if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next_listen; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_listen; if (!inet_diag_bc_sk(bc, sk)) goto next_listen; if (inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, NLM_F_MULTI, net_admin) < 0) { spin_unlock(&ilb->lock); goto done; } next_listen: ++num; } spin_unlock(&ilb->lock); s_num = 0; } skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; } /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets * with bh disabled. */ #define SKARR_SZ 16 /* Dump bound but inactive (not listening, connecting, etc.) sockets */ if (cb->args[0] == 1) { if (!(idiag_states & TCPF_BOUND_INACTIVE)) goto skip_bind_ht; for (i = s_i; i < hashinfo->bhash_size; i++) { struct inet_bind_hashbucket *ibb; struct inet_bind2_bucket *tb2; struct sock *sk_arr[SKARR_SZ]; int num_arr[SKARR_SZ]; int idx, accum, res; resume_bind_walk: num = 0; accum = 0; ibb = &hashinfo->bhash2[i]; if (hlist_empty(&ibb->chain)) { s_num = 0; continue; } spin_lock_bh(&ibb->lock); inet_bind_bucket_for_each(tb2, &ibb->chain) { if (!net_eq(ib2_net(tb2), net)) continue; sk_for_each_bound(sk, &tb2->owners) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_bind; if (sk->sk_state != TCP_CLOSE || !inet->inet_num) goto next_bind; if (r->sdiag_family != AF_UNSPEC && r->sdiag_family != sk->sk_family) goto next_bind; if (!inet_diag_bc_sk(bc, sk)) goto next_bind; sock_hold(sk); num_arr[accum] = num; sk_arr[accum] = sk; if (++accum == SKARR_SZ) goto pause_bind_walk; next_bind: num++; } } pause_bind_walk: spin_unlock_bh(&ibb->lock); res = 0; for (idx = 0; idx < accum; idx++) { if (res >= 0) { res = inet_sk_diag_fill(sk_arr[idx], NULL, skb, cb, r, NLM_F_MULTI, net_admin); if (res < 0) num = num_arr[idx]; } sock_put(sk_arr[idx]); } if (res < 0) goto done; cond_resched(); if (accum == SKARR_SZ) { s_num = num + 1; goto resume_bind_walk; } s_num = 0; } skip_bind_ht: cb->args[0] = 2; s_i = num = s_num = 0; } if (!(idiag_states & ~TCPF_LISTEN)) goto out; for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct hlist_nulls_node *node; struct sock *sk_arr[SKARR_SZ]; int num_arr[SKARR_SZ]; int idx, accum, res; if (hlist_nulls_empty(&head->chain)) continue; if (i > s_i) s_num = 0; next_chunk: num = 0; accum = 0; spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { int state; if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) goto next_normal; state = (sk->sk_state == TCP_TIME_WAIT) ? READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state; if (!(idiag_states & (1 << state))) goto next_normal; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) goto next_normal; if (r->id.idiag_sport != htons(sk->sk_num) && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != sk->sk_dport && r->id.idiag_dport) goto next_normal; twsk_build_assert(); if (!inet_diag_bc_sk(bc, sk)) goto next_normal; if (!refcount_inc_not_zero(&sk->sk_refcnt)) goto next_normal; num_arr[accum] = num; sk_arr[accum] = sk; if (++accum == SKARR_SZ) break; next_normal: ++num; } spin_unlock_bh(lock); res = 0; for (idx = 0; idx < accum; idx++) { if (res >= 0) { res = sk_diag_fill(sk_arr[idx], skb, cb, r, NLM_F_MULTI, net_admin); if (res < 0) num = num_arr[idx]; } sock_gen_put(sk_arr[idx]); } if (res < 0) break; cond_resched(); if (accum == SKARR_SZ) { s_num = num + 1; goto next_chunk; } } done: cb->args[1] = i; cb->args[2] = num; out: ; } EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { struct inet_diag_dump_data *cb_data = cb->data; const struct inet_diag_handler *handler; u32 prev_min_dump_alloc; int protocol, err = 0; protocol = inet_diag_get_protocol(r, cb_data); again: prev_min_dump_alloc = cb->min_dump_alloc; handler = inet_diag_lock_handler(protocol); if (handler) { handler->dump(skb, cb, r); inet_diag_unlock_handler(handler); } else { err = -ENOENT; } /* The skb is not large enough to fit one sk info and * inet_sk_diag_fill() has requested for a larger skb. */ if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); if (!err) goto again; } return err ? : skb->len; } static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); } static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) { const struct nlmsghdr *nlh = cb->nlh; struct inet_diag_dump_data *cb_data; struct sk_buff *skb = cb->skb; struct nlattr *nla; int err; cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); if (!cb_data) return -ENOMEM; err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); if (err) { kfree(cb_data); return err; } nla = cb_data->inet_diag_nla_bc; if (nla) { err = inet_diag_bc_audit(nla, skb); if (err) { kfree(cb_data); return err; } } nla = cb_data->inet_diag_nla_bpf_stgs; if (nla) { struct bpf_sk_storage_diag *bpf_stg_diag; bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); if (IS_ERR(bpf_stg_diag)) { kfree(cb_data); return PTR_ERR(bpf_stg_diag); } cb_data->bpf_stg_diag = bpf_stg_diag; } cb->data = cb_data; return 0; } static int inet_diag_dump_start(struct netlink_callback *cb) { return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); } static int inet_diag_dump_start_compat(struct netlink_callback *cb) { return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); } static int inet_diag_dump_done(struct netlink_callback *cb) { struct inet_diag_dump_data *cb_data = cb->data; bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); kfree(cb->data); return 0; } static int inet_diag_type2proto(int type) { switch (type) { case TCPDIAG_GETSOCK: return IPPROTO_TCP; case DCCPDIAG_GETSOCK: return IPPROTO_DCCP; default: return 0; } } static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); struct inet_diag_req_v2 req; req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; return __inet_diag_dump(skb, cb, &req); } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { struct inet_diag_req *rc = nlmsg_data(nlh); struct inet_diag_req_v2 req; req.sdiag_family = rc->idiag_family; req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, sizeof(struct inet_diag_req), &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { int hdrlen = sizeof(struct inet_diag_req); struct net *net = sock_net(skb->sk); if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) return -EINVAL; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = inet_diag_dump_start_compat, .done = inet_diag_dump_done, .dump = inet_diag_dump_compat, }; return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } return inet_diag_get_exact_compat(skb, nlh); } static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = inet_diag_dump_start, .done = inet_diag_dump_done, .dump = inet_diag_dump, }; return netlink_dump_start(net->diag_nlsk, skb, h, &c); } return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen, nlmsg_data(h)); } static int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) { const struct inet_diag_handler *handler; struct nlmsghdr *nlh; struct nlattr *attr; struct inet_diag_msg *r; void *info = NULL; int err = 0; nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); if (!nlh) return -ENOMEM; r = nlmsg_data(nlh); memset(r, 0, sizeof(*r)); inet_diag_msg_common_fill(r, sk); if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM) r->id.idiag_sport = inet_sk(sk)->inet_sport; r->idiag_state = sk->sk_state; if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { nlmsg_cancel(skb, nlh); return err; } handler = inet_diag_lock_handler(sk->sk_protocol); if (!handler) { nlmsg_cancel(skb, nlh); return -ENOENT; } attr = handler->idiag_info_size ? nla_reserve_64bit(skb, INET_DIAG_INFO, handler->idiag_info_size, INET_DIAG_PAD) : NULL; if (attr) info = nla_data(attr); handler->idiag_get_info(sk, r, info); inet_diag_unlock_handler(handler); nlmsg_end(skb, nlh); return 0; } static const struct sock_diag_handler inet_diag_handler = { .owner = THIS_MODULE, .family = AF_INET, .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .owner = THIS_MODULE, .family = AF_INET6, .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; if (type >= IPPROTO_MAX) return -EINVAL; return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type], NULL, h) ? 0 : -EEXIST; } EXPORT_SYMBOL_GPL(inet_diag_register); void inet_diag_unregister(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; if (type >= IPPROTO_MAX) return; xchg((const struct inet_diag_handler **)&inet_diag_table[type], NULL); } EXPORT_SYMBOL_GPL(inet_diag_unregister); static const struct sock_diag_inet_compat inet_diag_compat = { .owner = THIS_MODULE, .fn = inet_diag_rcv_msg_compat, }; static int __init inet_diag_init(void) { const int inet_diag_table_size = (IPPROTO_MAX * sizeof(struct inet_diag_handler *)); int err = -ENOMEM; inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL); if (!inet_diag_table) goto out; err = sock_diag_register(&inet_diag_handler); if (err) goto out_free_nl; err = sock_diag_register(&inet6_diag_handler); if (err) goto out_free_inet; sock_diag_register_inet_compat(&inet_diag_compat); out: return err; out_free_inet: sock_diag_unregister(&inet_diag_handler); out_free_nl: kfree(inet_diag_table); goto out; } static void __exit inet_diag_exit(void) { sock_diag_unregister(&inet6_diag_handler); sock_diag_unregister(&inet_diag_handler); sock_diag_unregister_inet_compat(&inet_diag_compat); kfree(inet_diag_table); } module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
1 2 3 4 5 6 7 8 9 10 11 12 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MSDOS_FS_H #define _LINUX_MSDOS_FS_H #include <uapi/linux/msdos_fs.h> /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } #endif /* !_LINUX_MSDOS_FS_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_VMX_POSTED_INTR_H #define __KVM_X86_VMX_POSTED_INTR_H #include <linux/bitmap.h> #include <asm/posted_intr.h> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu); void pi_wakeup_handler(void); void __init pi_init_cpu(int cpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void vmx_pi_start_assignment(struct kvm *kvm); static inline int pi_find_highest_vector(struct pi_desc *pi_desc) { int vec; vec = find_last_bit((unsigned long *)pi_desc->pir, 256); return vec < 256 ? vec : -1; } #endif /* __KVM_X86_VMX_POSTED_INTR_H */
62 110 247 11 619 619 390 390 588 588 486 486 373 373 393 398 348 398 153 235 470 209 414 68 3 3 1 3 386 386 110 512 270 369 352 359 95 596 437 356 90 356 360 5 28 21 1 21 619 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ASM_KVM_CACHE_REGS_H #define ASM_KVM_CACHE_REGS_H #include <linux/kvm_host.h> #define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) #define X86_CR0_PDPTR_BITS (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG) #define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP) #define X86_CR4_PDPTR_BITS (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP) static_assert(!(KVM_POSSIBLE_CR0_GUEST_BITS & X86_CR0_PDPTR_BITS)); #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ { \ return vcpu->arch.regs[VCPU_REGS_##uname]; \ } \ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ unsigned long val) \ { \ vcpu->arch.regs[VCPU_REGS_##uname] = val; \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) BUILD_KVM_GPR_ACCESSORS(rcx, RCX) BUILD_KVM_GPR_ACCESSORS(rdx, RDX) BUILD_KVM_GPR_ACCESSORS(rbp, RBP) BUILD_KVM_GPR_ACCESSORS(rsi, RSI) BUILD_KVM_GPR_ACCESSORS(rdi, RDI) #ifdef CONFIG_X86_64 BUILD_KVM_GPR_ACCESSORS(r8, R8) BUILD_KVM_GPR_ACCESSORS(r9, R9) BUILD_KVM_GPR_ACCESSORS(r10, R10) BUILD_KVM_GPR_ACCESSORS(r11, R11) BUILD_KVM_GPR_ACCESSORS(r12, R12) BUILD_KVM_GPR_ACCESSORS(r13, R13) BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif /* * Using the register cache from interrupt context is generally not allowed, as * caching a register and marking it available/dirty can't be done atomically, * i.e. accesses from interrupt context may clobber state or read stale data if * the vCPU task is in the process of updating the cache. The exception is if * KVM is handling a PMI IRQ/NMI VM-Exit, as that bound code sequence doesn't * touch the cache, it runs after the cache is reset (post VM-Exit), and PMIs * need to access several registers that are cacheable. */ #define kvm_assert_register_caching_allowed(vcpu) \ lockdep_assert_once(in_task() || kvm_arch_pmi_in_guest(vcpu)) /* * avail dirty * 0 0 register in VMCS/VMCB * 0 1 *INVALID* * 1 0 register in vcpu->arch * 1 1 register in vcpu->arch, needs to be stored back */ static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } /* * kvm_register_test_and_mark_available() is a special snowflake that uses an * arch bitop directly to avoid the explicit instrumentation that comes with * the generic bitops. This allows code that cannot be instrumented (noinstr * functions), e.g. the low level VM-Enter/VM-Exit paths, to cache registers. */ static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { kvm_assert_register_caching_allowed(vcpu); return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } /* * The "raw" register helpers are only for cases where the full 64 bits of a * register are read/written irrespective of current vCPU mode. In other words, * odds are good you shouldn't be using the raw variants. */ static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return 0; if (!kvm_register_is_available(vcpu, reg)) kvm_x86_call(cache_reg)(vcpu, reg); return vcpu->arch.regs[reg]; } static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg, unsigned long val) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return; vcpu->arch.regs[reg] = val; kvm_register_mark_dirty(vcpu, reg); } static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) { return kvm_register_read_raw(vcpu, VCPU_REGS_RIP); } static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) { kvm_register_write_raw(vcpu, VCPU_REGS_RIP, val); } static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) { return kvm_register_read_raw(vcpu, VCPU_REGS_RSP); } static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val) { kvm_register_write_raw(vcpu, VCPU_REGS_RSP, val); } static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) { might_sleep(); /* on svm */ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_PDPTR); return vcpu->arch.walk_mmu->pdptrs[index]; } static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value) { vcpu->arch.walk_mmu->pdptrs[index] = value; } static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; if ((tmask & vcpu->arch.cr0_guest_owned_bits) && !kvm_register_is_available(vcpu, VCPU_EXREG_CR0)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR0); return vcpu->arch.cr0 & mask; } static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu, unsigned long cr0_bit) { BUILD_BUG_ON(!is_power_of_2(cr0_bit)); return !!kvm_read_cr0_bits(vcpu, cr0_bit); } static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) { return kvm_read_cr0_bits(vcpu, ~0UL); } static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; if ((tmask & vcpu->arch.cr4_guest_owned_bits) && !kvm_register_is_available(vcpu, VCPU_EXREG_CR4)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR4); return vcpu->arch.cr4 & mask; } static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu, unsigned long cr4_bit) { BUILD_BUG_ON(!is_power_of_2(cr4_bit)); return !!kvm_read_cr4_bits(vcpu, cr4_bit); } static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) { if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) kvm_x86_call(cache_reg)(vcpu, VCPU_EXREG_CR3); return vcpu->arch.cr3; } static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, ~0UL); } static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) { return (kvm_rax_read(vcpu) & -1u) | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32); } static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; vcpu->stat.guest_mode = 1; } static inline void leave_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags &= ~HF_GUEST_MASK; if (vcpu->arch.load_eoi_exitmap_pending) { vcpu->arch.load_eoi_exitmap_pending = false; kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); } vcpu->stat.guest_mode = 0; } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) { return vcpu->arch.hflags & HF_GUEST_MASK; } #endif
2 7 4 9 1 1 2 1 77 77 1 13 1 1 4 9 5 5 5 5 5 5 5 5 5 5 5 9 9 5 5 5 5 5 5 5 5 5 5 5 9 9 9 9 9 9 9 9 9 9 13 1 13 13 13 13 9 4 4 4 13 9 9 4 5 5 5 5 5 11 11 11 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 70 72 1 72 1 1 1 1 1 1 949 9 944 9 9 88 90 825 1419 1414 62 62 61 1 1 9 9 9 9 9 9 9 9 9 9 9 8 77 76 77 77 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 // SPDX-License-Identifier: GPL-2.0-or-later /* * Handle firewalling * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> * Bart De Schuymer <bdschuym@pandora.be> * * Lennert dedicates this file to Kerstin Wurdinger. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> #include <linux/netfilter_bridge.h> #include <uapi/linux/netfilter_bridge.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_arp.h> #include <linux/in_route.h> #include <linux/rculist.h> #include <linux/inetdevice.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/dst_metadata.h> #include <net/route.h> #include <net/netfilter/br_netfilter.h> #include <net/netns/generic.h> #include <net/inet_dscp.h> #include <linux/uaccess.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack_core.h> #endif static unsigned int brnf_net_id __read_mostly; struct brnf_net { bool enabled; #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_hdr; #endif /* default value is 1 */ int call_iptables; int call_ip6tables; int call_arptables; /* default value is 0 */ int filter_vlan_tagged; int filter_pppoe_tagged; int pass_vlan_indev; }; #define IS_IP(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) #define IS_IPV6(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) #define IS_ARP(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) static inline __be16 vlan_proto(const struct sk_buff *skb) { if (skb_vlan_tag_present(skb)) return skb->protocol; else if (skb->protocol == htons(ETH_P_8021Q)) return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; else return 0; } static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; } static inline bool is_vlan_ipv6(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return vlan_proto(skb) == htons(ETH_P_IPV6) && brnet->filter_vlan_tagged; } static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; } static inline __be16 pppoe_proto(const struct sk_buff *skb) { return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + sizeof(struct pppoe_hdr))); } static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return skb->protocol == htons(ETH_P_PPP_SES) && pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; } static inline bool is_pppoe_ipv6(const struct sk_buff *skb, const struct net *net) { struct brnf_net *brnet = net_generic(net, brnf_net_id); return skb->protocol == htons(ETH_P_PPP_SES) && pppoe_proto(skb) == htons(PPP_IPV6) && brnet->filter_pppoe_tagged; } /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) struct brnf_frag_data { local_lock_t bh_lock; char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; u8 size; u16 vlan_tci; __be16 vlan_proto; }; static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; static void nf_bridge_info_free(struct sk_buff *skb) { skb_ext_del(skb, SKB_EXT_BRIDGE_NF); } static inline struct net_device *bridge_parent(const struct net_device *dev) { struct net_bridge_port *port; port = br_port_get_rcu(dev); return port ? port->br->dev : NULL; } static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { return skb_ext_add(skb, SKB_EXT_BRIDGE_NF); } unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) { switch (skb->protocol) { case __cpu_to_be16(ETH_P_8021Q): return VLAN_HLEN; case __cpu_to_be16(ETH_P_PPP_SES): return PPPOE_SES_HLEN; default: return 0; } } static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); skb_pull(skb, len); skb->network_header += len; } static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) { unsigned int len = nf_bridge_encap_header_len(skb); skb_pull_rcsum(skb, len); skb->network_header += len; } /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format */ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) { const struct iphdr *iph; u32 len; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; iph = ip_hdr(skb); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto csum_error; len = skb_ip_totlen(skb); if (skb->len < len) { __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; if (pskb_trim_rcsum(skb, len)) { __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); /* We should really parse IP options here but until * somebody who actually uses IP options complains to * us we'll just silently ignore the options because * we're lazy! */ return 0; csum_error: __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: return -1; } void nf_bridge_update_protocol(struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); switch (nf_bridge->orig_proto) { case BRNF_PROTO_8021Q: skb->protocol = htons(ETH_P_8021Q); break; case BRNF_PROTO_PPPOE: skb->protocol = htons(ETH_P_PPP_SES); break; case BRNF_PROTO_UNCHANGED: break; } } /* Obtain the correct destination MAC address, while preserving the original * source MAC address. If we already know this address, we just copy it. If we * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) { struct neighbour *neigh; struct dst_entry *dst; skb->dev = bridge_parent(skb->dev); if (!skb->dev) goto free_skb; dst = skb_dst(skb); neigh = dst_neigh_lookup_skb(dst, skb); if (neigh) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) && READ_ONCE(neigh->hh.hh_len)) { struct net_device *br_indev; br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { neigh_release(neigh); goto free_skb; } neigh_hh_bridge(&neigh->hh, skb); skb->dev = br_indev; ret = br_handle_frame_finish(net, sk, skb); } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and * protocol number. */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->bridged_dnat = 1; /* FIXME Need to refragment */ ret = READ_ONCE(neigh->output)(neigh, skb); } neigh_release(neigh); return ret; } free_skb: kfree_skb(skb); return 0; } static inline bool br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, const struct nf_bridge_info *nf_bridge) { return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; } /* This requires some explaining. If DNAT has taken place, * we will need to fix up the destination Ethernet address. * This is also true when SNAT takes place (for the reply direction). * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge * port group as it was received on. We can still bridge * the packet. * 2. The packet was DNAT'ed to a different device, either * a non-bridged device or another bridge port group. * The packet will need to be routed. * * The correct way of distinguishing between these two cases is to * call ip_route_input() and to look at skb->dst->dev, which is * changed to the destination device if ip_route_input() succeeds. * * Let's first consider the case that ip_route_input() succeeds: * * If the output device equals the logical bridge device the packet * came in on, we can consider this bridging. The corresponding MAC * address will be obtained in br_nf_pre_routing_finish_bridge. * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will * later be passed up to the IP stack to be routed. For a redirected * packet, ip_route_input() will give back the localhost as output device, * which differs from the bridge device. * * Let's now consider the case that ip_route_input() fails: * * This can be because the destination address is martian, in which case * the packet will be dropped. * If IP forwarding is disabled, ip_route_input() will fail, while * ip_route_output_key() can return success. The source * address for ip_route_output_key() is set to zero, so ip_route_output_key() * thinks we're handling a locally generated packet and won't care * if IP forwarding is enabled. If the output device equals the logical bridge * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *dev = skb->dev, *br_indev; const struct iphdr *iph = ip_hdr(skb); enum skb_drop_reason reason; struct rtable *rt; br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { kfree_skb(skb); return 0; } nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge->in_prerouting = 0; if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { reason = ip_route_input(skb, iph->daddr, iph->saddr, ip4h_dscp(iph), dev); if (reason) { kfree_skb_reason(skb, reason); return 0; } else { if (skb_dst(skb)->dev == dev) { skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); skb->pkt_type = PACKET_HOST; } } else { rt = bridge_parent_rtable(br_indev); if (!rt) { kfree_skb(skb); return 0; } skb_dst_drop(skb); skb_dst_set_noref(skb, &rt->dst); } skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_handle_frame_finish); return 0; } static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev, const struct net *net) { struct net_device *vlan, *br; struct brnf_net *brnet = net_generic(net, brnf_net_id); br = bridge_parent(dev); if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) return br; vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, skb_vlan_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } /* Some common code for IPv4/IPv6 */ struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; } nf_bridge->in_prerouting = 1; nf_bridge->physinif = skb->dev->ifindex; skb->dev = brnf_get_logical_dev(skb, skb->dev, net); if (skb->protocol == htons(ETH_P_8021Q)) nf_bridge->orig_proto = BRNF_PROTO_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) nf_bridge->orig_proto = BRNF_PROTO_PPPOE; /* Must drop socket now because of tproxy. */ skb_orphan(skb); return skb->dev; } /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. * Replicate the checks that IPv4 does on packet reception. * Set skb->dev to the bridge device (i.e. parent of the * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ static unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); struct brnf_net *brnet; if (unlikely(!pskb_may_pull(skb, len))) return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); p = br_port_get_rcu(state->in); if (p == NULL) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); br = p->br; brnet = net_generic(state->net, brnf_net_id); if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || is_pppoe_ipv6(skb, state->net)) { if (!brnet->call_ip6tables && !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) return NF_ACCEPT; if (!ipv6_mod_enabled()) { pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0); } nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(priv, skb, state); } if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) return NF_ACCEPT; if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && !is_pppoe_ip(skb, state->net)) return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); if (br_validate_ipv4(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); if (!nf_bridge_alloc(skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); if (!setup_pre_routing(skb, state->net)) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; } #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* conntracks' nf_confirm logic cannot handle cloned skbs referencing * the same nf_conn entry, which will happen for multicast (broadcast) * Frames on bridges. * * Example: * macvlan0 * br0 * ethX ethY * * ethX (or Y) receives multicast or broadcast packet containing * an IP packet, not yet in conntrack table. * * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. * -> skb->_nfct now references a unconfirmed entry * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge * interface. * 3. skb gets passed up the stack. * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb * and schedules a work queue to send them out on the lower devices. * * The clone skb->_nfct is not a copy, it is the same entry as the * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. * * The Macvlan broadcast worker and normal confirm path will race. * * This race will not happen if step 2 already confirmed a clone. In that * case later steps perform skb_clone() with skb->_nfct already confirmed (in * hash table). This works fine. * * But such confirmation won't happen when eb/ip/nftables rules dropped the * packets before they reached the nf_confirm step in postrouting. * * Work around this problem by explicit confirmation of the entry at * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed * entry. * */ static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { bool promisc = BR_INPUT_SKB_CB(skb)->promisc; struct nf_conntrack *nfct = skb_nfct(skb); const struct nf_ct_hook *ct_hook; struct nf_conn *ct; int ret; if (promisc) { nf_reset_ct(skb); return NF_ACCEPT; } if (!nfct || skb->pkt_type == PACKET_HOST) return NF_ACCEPT; ct = container_of(nfct, struct nf_conn, ct_general); if (likely(nf_ct_is_confirmed(ct))) return NF_ACCEPT; if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) { nf_reset_ct(skb); return NF_ACCEPT; } WARN_ON_ONCE(skb_shared(skb)); /* We can't call nf_confirm here, it would create a dependency * on nf_conntrack module. */ ct_hook = rcu_dereference(nf_ct_hook); if (!ct_hook) { skb->_nfct = 0ul; nf_conntrack_put(nfct); return NF_ACCEPT; } nf_bridge_pull_encap_header(skb); ret = ct_hook->confirm(skb); switch (ret & NF_VERDICT_MASK) { case NF_STOLEN: return NF_STOLEN; default: nf_bridge_push_encap_header(skb); break; } ct = container_of(nfct, struct nf_conn, ct_general); WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); return ret; } #endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { if (skb->protocol == htons(ETH_P_IP)) nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (skb->protocol == htons(ETH_P_IPV6)) nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; in = nf_bridge_get_physindev(skb, net); if (!in) { kfree_skb(skb); return 0; } if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge_update_protocol(skb); } else { in = *((struct net_device **)(skb->cb)); } nf_bridge_push_encap_header(skb); br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, br_forward_finish); return 0; } static unsigned int br_nf_forward_ip(struct sk_buff *skb, const struct nf_hook_state *state, u8 pf) { struct nf_bridge_info *nf_bridge; struct net_device *parent; nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) return NF_ACCEPT; /* Need exclusive nf_bridge_info since we might have multiple * different physoutdevs. */ if (!nf_bridge_unshare(skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); parent = bridge_parent(state->out); if (!parent) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge_pull_encap_header(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; } if (pf == NFPROTO_IPV4) { if (br_validate_ipv4(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; skb->protocol = htons(ETH_P_IP); } else if (pf == NFPROTO_IPV6) { if (br_validate_ipv6(state->net, skb)) return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; skb->protocol = htons(ETH_P_IPV6); } else { WARN_ON_ONCE(1); return NF_DROP; } nf_bridge->physoutdev = skb->dev; NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, brnf_get_logical_dev(skb, state->in, state->net), parent, br_nf_forward_finish); return NF_STOLEN; } static unsigned int br_nf_forward_arp(struct sk_buff *skb, const struct nf_hook_state *state) { struct net_bridge_port *p; struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); struct brnf_net *brnet; p = br_port_get_rcu(state->out); if (p == NULL) return NF_ACCEPT; br = p->br; brnet = net_generic(state->net, brnf_net_id); if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) return NF_ACCEPT; if (is_vlan_arp(skb, state->net)) nf_bridge_pull_encap_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); if (arp_hdr(skb)->ar_pln != 4) { if (is_vlan_arp(skb, state->net)) nf_bridge_push_encap_header(skb); return NF_ACCEPT; } *d = state->in; NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, state->in, state->out, br_nf_forward_finish); return NF_STOLEN; } /* This is the 'purely bridged' case. For IP, we pass the packet to * netfilter with indev and outdev set to the bridge device, * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ static unsigned int br_nf_forward(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) return br_nf_forward_ip(skb, state, NFPROTO_IPV4); if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || is_pppoe_ipv6(skb, state->net)) return br_nf_forward_ip(skb, state, NFPROTO_IPV6); if (IS_ARP(skb) || is_vlan_arp(skb, state->net)) return br_nf_forward_arp(skb, state); return NF_ACCEPT; } static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; int err; data = this_cpu_ptr(&brnf_frag_data_storage); err = skb_cow_head(skb, data->size); if (err) { kfree_skb(skb); return 0; } if (data->vlan_proto) __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); __skb_push(skb, data->encap_size); nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } static int br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { unsigned int mtu = ip_skb_dst_mtu(sk, skb); struct iphdr *iph = ip_hdr(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } return ip_do_fragment(net, sk, skb, output); } static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); unsigned int mtu, mtu_reserved; int ret; mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } /* Fragmentation on metadata/template dst is not supported */ if (unlikely(!skb_valid_dst(skb))) goto drop; /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) && skb->protocol == htons(ETH_P_IP)) { struct brnf_frag_data *data; if (br_validate_ipv4(net, skb)) goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; local_lock_nested_bh(&brnf_frag_data_storage.bh_lock); data = this_cpu_ptr(&brnf_frag_data_storage); if (skb_vlan_tag_present(skb)) { data->vlan_tci = skb->vlan_tci; data->vlan_proto = skb->vlan_proto; } else { data->vlan_proto = 0; } data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); ret = br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); return ret; } if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); struct brnf_frag_data *data; if (br_validate_ipv6(net, skb)) goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; local_lock_nested_bh(&brnf_frag_data_storage.bh_lock); data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); if (v6ops) { ret = v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); return ret; } local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); kfree_skb(skb); return -EMSGSIZE; } nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); drop: kfree_skb(skb); return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in * on a bridge, but was delivered locally and is now being routed: * * POST_ROUTING was already invoked from the ip stack. */ if (!nf_bridge || !nf_bridge->physoutdev) return NF_ACCEPT; if (!realoutdev) return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) pf = NFPROTO_IPV4; else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || is_pppoe_ipv6(skb, state->net)) pf = NFPROTO_IPV6; else return NF_ACCEPT; if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->pkt_otherhost = true; } nf_bridge_pull_encap_header(skb); if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, NULL, realoutdev, br_nf_dev_queue_xmit); return NF_STOLEN; } /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge) { if (nf_bridge->sabotage_in_done) return NF_ACCEPT; if (!nf_bridge->in_prerouting && !netif_is_l3_master(skb->dev) && !netif_is_l3_slave(skb->dev)) { nf_bridge->sabotage_in_done = 1; state->okfn(state->net, state->sk, skb); return NF_STOLEN; } } return NF_ACCEPT; } /* This is called when br_netfilter has called into iptables/netfilter, * and DNAT has taken place on a bridge-forwarded packet. * * neigh->output has created a new MAC header, with local br0 MAC * as saddr. * * This restores the original MAC saddr of the bridged packet * before invoking bridge forward logic to transmit the packet. */ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *br_indev; br_indev = nf_bridge_get_physindev(skb, dev_net(skb->dev)); if (!br_indev) { kfree_skb(skb); return; } skb_pull(skb, ETH_HLEN); nf_bridge->bridged_dnat = 0; BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), nf_bridge->neigh_header, ETH_HLEN - ETH_ALEN); skb->dev = br_indev; nf_bridge->physoutdev = NULL; br_handle_frame_finish(dev_net(skb->dev), NULL, skb); } static int br_nf_dev_xmit(struct sk_buff *skb) { const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge && nf_bridge->bridged_dnat) { br_nf_pre_routing_finish_bridge_slow(skb); return 1; } return 0; } static const struct nf_br_ops br_ops = { .br_dev_xmit_hook = br_nf_dev_xmit, }; /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static const struct nf_hook_ops br_nf_ops[] = { { .hook = br_nf_pre_routing, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, #if IS_ENABLED(CONFIG_NF_CONNTRACK) { .hook = br_nf_local_in, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_LAST, }, #endif { .hook = br_nf_forward, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_BRNF, }, { .hook = br_nf_post_routing, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_LAST, }, { .hook = ip_sabotage_in, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_FIRST, }, { .hook = ip_sabotage_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, }; static int brnf_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct brnf_net *brnet; struct net *net; int ret; if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) return NOTIFY_DONE; ASSERT_RTNL(); net = dev_net(dev); brnet = net_generic(net, brnf_net_id); if (brnet->enabled) return NOTIFY_OK; ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); if (ret) return NOTIFY_BAD; brnet->enabled = true; return NOTIFY_OK; } static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; /* recursively invokes nf_hook_slow (again), skipping already-called * hooks (< NF_BR_PRI_BRNF). * * Called with rcu read lock held. */ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { const struct nf_hook_entries *e; struct nf_hook_state state; struct nf_hook_ops **ops; unsigned int i; int ret; e = rcu_dereference(net->nf.hooks_bridge[hook]); if (!e) return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); for (i = 0; i < e->num_hook_entries; i++) { /* These hooks have already been called */ if (ops[i]->priority < NF_BR_PRI_BRNF) continue; /* These hooks have not been called yet, run them. */ if (ops[i]->priority > NF_BR_PRI_BRNF) break; /* take a closer look at NF_BR_PRI_BRNF. */ if (ops[i]->hook == br_nf_pre_routing) { /* This hook diverted the skb to this function, * hooks after this have not been run yet. */ i++; break; } } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, e, i); if (ret == 1) ret = okfn(net, sk, skb); return ret; } #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *(int *)(ctl->data)) *(int *)(ctl->data) = 1; return ret; } static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-iptables", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-ip6tables", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-vlan-tagged", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-pppoe-tagged", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-pass-vlan-input-dev", .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, }; static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) { brnf->call_iptables = 1; brnf->call_ip6tables = 1; brnf->call_arptables = 1; brnf->filter_vlan_tagged = 0; brnf->filter_pppoe_tagged = 0; brnf->pass_vlan_indev = 0; } static int br_netfilter_sysctl_init_net(struct net *net) { struct ctl_table *table = brnf_table; struct brnf_net *brnet; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); if (!table) return -ENOMEM; } brnet = net_generic(net, brnf_net_id); table[0].data = &brnet->call_arptables; table[1].data = &brnet->call_iptables; table[2].data = &brnet->call_ip6tables; table[3].data = &brnet->filter_vlan_tagged; table[4].data = &brnet->filter_pppoe_tagged; table[5].data = &brnet->pass_vlan_indev; br_netfilter_sysctl_default(brnet); brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table, ARRAY_SIZE(brnf_table)); if (!brnet->ctl_hdr) { if (!net_eq(net, &init_net)) kfree(table); return -ENOMEM; } return 0; } static void br_netfilter_sysctl_exit_net(struct net *net, struct brnf_net *brnet) { const struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; unregister_net_sysctl_table(brnet->ctl_hdr); if (!net_eq(net, &init_net)) kfree(table); } static int __net_init brnf_init_net(struct net *net) { return br_netfilter_sysctl_init_net(net); } #endif static void __net_exit brnf_exit_net(struct net *net) { struct brnf_net *brnet; brnet = net_generic(net, brnf_net_id); if (brnet->enabled) { nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); brnet->enabled = false; } #ifdef CONFIG_SYSCTL br_netfilter_sysctl_exit_net(net, brnet); #endif } static struct pernet_operations brnf_net_ops __read_mostly = { #ifdef CONFIG_SYSCTL .init = brnf_init_net, #endif .exit = brnf_exit_net, .id = &brnf_net_id, .size = sizeof(struct brnf_net), }; static int __init br_netfilter_init(void) { int ret; ret = register_pernet_subsys(&brnf_net_ops); if (ret < 0) return ret; ret = register_netdevice_notifier(&brnf_notifier); if (ret < 0) { unregister_pernet_subsys(&brnf_net_ops); return ret; } RCU_INIT_POINTER(nf_br_ops, &br_ops); printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; } static void __exit br_netfilter_fini(void) { RCU_INIT_POINTER(nf_br_ops, NULL); unregister_netdevice_notifier(&brnf_notifier); unregister_pernet_subsys(&brnf_net_ops); } module_init(br_netfilter_init); module_exit(br_netfilter_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
4076 4095 144 553 237 48 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 /* SPDX-License-Identifier: GPL-2.0-only */ /* * fs/kernfs/kernfs-internal.h - kernfs internal header file * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de> */ #ifndef __KERNFS_INTERNAL_H #define __KERNFS_INTERNAL_H #include <linux/lockdep.h> #include <linux/fs.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/xattr.h> #include <linux/kernfs.h> #include <linux/fs_context.h> struct kernfs_iattrs { kuid_t ia_uid; kgid_t ia_gid; struct timespec64 ia_atime; struct timespec64 ia_mtime; struct timespec64 ia_ctime; struct simple_xattrs xattrs; atomic_t nr_user_xattrs; atomic_t user_xattr_size; }; struct kernfs_root { /* published fields */ struct kernfs_node *kn; unsigned int flags; /* KERNFS_ROOT_* flags */ /* private fields, do not use outside kernfs proper */ struct idr ino_idr; u32 last_id_lowbits; u32 id_highbits; struct kernfs_syscall_ops *syscall_ops; /* list of kernfs_super_info of this root, protected by kernfs_rwsem */ struct list_head supers; wait_queue_head_t deactivate_waitq; struct rw_semaphore kernfs_rwsem; struct rw_semaphore kernfs_iattr_rwsem; struct rw_semaphore kernfs_supers_rwsem; struct rcu_head rcu; }; /* +1 to avoid triggering overflow warning when negating it */ #define KN_DEACTIVATED_BIAS (INT_MIN + 1) /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ /** * kernfs_root - find out the kernfs_root a kernfs_node belongs to * @kn: kernfs_node of interest * * Return: the kernfs_root @kn belongs to. */ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) { /* if parent exists, it's always a dir; otherwise, @sd is a dir */ if (kn->parent) kn = kn->parent; return kn->dir.root; } /* * mount.c */ struct kernfs_super_info { struct super_block *sb; /* * The root associated with this super_block. Each super_block is * identified by the root and ns it's associated with. */ struct kernfs_root *root; /* * Each sb is associated with one namespace tag, currently the * network namespace of the task which mounted this kernfs * instance. If multiple tags become necessary, make the following * an array and compare kernfs_node tag against every entry. */ const void *ns; /* anchored at kernfs_root->supers, protected by kernfs_rwsem */ struct list_head node; }; #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry) { if (d_really_is_negative(dentry)) return NULL; return d_inode(dentry)->i_private; } static inline void kernfs_set_rev(struct kernfs_node *parent, struct dentry *dentry) { dentry->d_time = parent->dir.rev; } static inline void kernfs_inc_rev(struct kernfs_node *parent) { parent->dir.rev++; } static inline bool kernfs_dir_changed(struct kernfs_node *parent, struct dentry *dentry) { if (parent->dir.rev != dentry->d_time) return true; return false; } extern const struct super_operations kernfs_sops; extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; /* * inode.c */ extern const struct xattr_handler * const kernfs_xattr_handlers[]; void kernfs_evict_inode(struct inode *inode); int kernfs_iop_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); int kernfs_iop_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); /* * dir.c */ extern const struct dentry_operations kernfs_dops; extern const struct file_operations kernfs_dir_fops; extern const struct inode_operations kernfs_dir_iops; struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); void kernfs_put_active(struct kernfs_node *kn); int kernfs_add_one(struct kernfs_node *kn); struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, unsigned flags); /* * file.c */ extern const struct file_operations kernfs_file_fops; bool kernfs_should_drain_open_files(struct kernfs_node *kn); void kernfs_drain_open_files(struct kernfs_node *kn); /* * symlink.c */ extern const struct inode_operations kernfs_symlink_iops; /* * kernfs locks */ extern struct kernfs_global_locks *kernfs_locks; #endif /* __KERNFS_INTERNAL_H */
3419 3341 3316 3295 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BSEARCH_H #define _LINUX_BSEARCH_H #include <linux/types.h> static __always_inline void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) { const char *pivot; int result; while (num > 0) { pivot = base + (num >> 1) * size; result = cmp(key, pivot); if (result == 0) return (void *)pivot; if (result > 0) { base = pivot + size; num--; } num >>= 1; } return NULL; } extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp); #endif /* _LINUX_BSEARCH_H */
8 8 8 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 // SPDX-License-Identifier: GPL-2.0+ #include <linux/errno.h> #include <linux/ioport.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/serial.h> #include <linux/serial_8250.h> #include "8250.h" #define PORT_RSA_MAX 4 static unsigned long probe_rsa[PORT_RSA_MAX]; static unsigned int probe_rsa_count; static int rsa8250_request_resource(struct uart_8250_port *up) { unsigned long start = UART_RSA_BASE << up->port.regshift; unsigned int size = 8 << up->port.regshift; struct uart_port *port = &up->port; int ret = -EINVAL; switch (port->iotype) { case UPIO_HUB6: case UPIO_PORT: start += port->iobase; if (request_region(start, size, "serial-rsa")) ret = 0; else ret = -EBUSY; break; } return ret; } static void rsa8250_release_resource(struct uart_8250_port *up) { unsigned long offset = UART_RSA_BASE << up->port.regshift; unsigned int size = 8 << up->port.regshift; struct uart_port *port = &up->port; switch (port->iotype) { case UPIO_HUB6: case UPIO_PORT: release_region(port->iobase + offset, size); break; } } static void univ8250_config_port(struct uart_port *port, int flags) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int i; up->probe &= ~UART_PROBE_RSA; if (port->type == PORT_RSA) { if (rsa8250_request_resource(up) == 0) up->probe |= UART_PROBE_RSA; } else if (flags & UART_CONFIG_TYPE) { for (i = 0; i < probe_rsa_count; i++) { if (probe_rsa[i] == up->port.iobase) { if (rsa8250_request_resource(up) == 0) up->probe |= UART_PROBE_RSA; break; } } } univ8250_port_base_ops->config_port(port, flags); if (port->type != PORT_RSA && up->probe & UART_PROBE_RSA) rsa8250_release_resource(up); } static int univ8250_request_port(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); int ret; ret = univ8250_port_base_ops->request_port(port); if (ret == 0 && port->type == PORT_RSA) { ret = rsa8250_request_resource(up); if (ret < 0) univ8250_port_base_ops->release_port(port); } return ret; } static void univ8250_release_port(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); if (port->type == PORT_RSA) rsa8250_release_resource(up); univ8250_port_base_ops->release_port(port); } void univ8250_rsa_support(struct uart_ops *ops) { ops->config_port = univ8250_config_port; ops->request_port = univ8250_request_port; ops->release_port = univ8250_release_port; } module_param_hw_array(probe_rsa, ulong, ioport, &probe_rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA"); #ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS #ifndef MODULE /* * Keep the old "8250" name working as well for the module options so we don't * break people. We need to keep the names identical and the convenient macros * will happily refuse to let us do that by failing the build with redefinition * errors of global variables. So we stick them inside a dummy function to * avoid those conflicts. The options still get parsed, and the redefined * MODULE_PARAM_PREFIX lets us keep the "8250." syntax alive. * * This is hacky. I'm sorry. */ static void __used rsa8250_options(void) { #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "8250_core." __module_param_call(MODULE_PARAM_PREFIX, probe_rsa, &param_array_ops, .arr = &__param_arr_probe_rsa, 0444, -1, 0); } #endif #endif
111 206 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 /* SPDX-License-Identifier: GPL-2.0 */ /* * win_minmax.h: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H #define MINMAX_H #include <linux/types.h> /* A single data point for our parameterized min-max tracker */ struct minmax_sample { u32 t; /* time measurement was taken */ u32 v; /* value measured */ }; /* State for the parameterized min-max tracker */ struct minmax { struct minmax_sample s[3]; }; static inline u32 minmax_get(const struct minmax *m) { return m->s[0].v; } static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) { struct minmax_sample val = { .t = t, .v = meas }; m->s[2] = m->s[1] = m->s[0] = val; return m->s[0].v; } u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); #endif
15 15 15 7 14 13 12 12 8 8 5 8 12 12 7 7 12 21 20 1 19 19 10 19 15 21 1 37 32 8 8 8 4 4 4 4 4 4 4 19 15 4 30 3 3 1 2 1 38 31 3 30 28 27 25 24 24 24 23 5 24 1 23 19 21 10 8 38 38 1 3 3 3 1 2 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 // SPDX-License-Identifier: GPL-2.0 #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/exportfs.h> #include <linux/fs_struct.h> #include <linux/fsnotify.h> #include <linux/personality.h> #include <linux/uaccess.h> #include <linux/compat.h> #include "internal.h" #include "mount.h" static long do_sys_name_to_handle(const struct path *path, struct file_handle __user *ufh, void __user *mnt_id, bool unique_mntid, int fh_flags) { long retval; struct file_handle f_handle; int handle_dwords, handle_bytes; struct file_handle *handle = NULL; /* * We need to make sure whether the file system support decoding of * the file handle if decodeable file handle was requested. */ if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags)) return -EOPNOTSUPP; /* * A request to encode a connectable handle for a disconnected dentry * is unexpected since AT_EMPTY_PATH is not allowed. */ if (fh_flags & EXPORT_FH_CONNECTABLE && WARN_ON(path->dentry->d_flags & DCACHE_DISCONNECTED)) return -EINVAL; if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) return -EFAULT; if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) return -ENOMEM; /* convert handle size to multiple of sizeof(u32) */ handle_dwords = f_handle.handle_bytes >> 2; /* Encode a possibly decodeable/connectable file handle */ retval = exportfs_encode_fh(path->dentry, (struct fid *)handle->f_handle, &handle_dwords, fh_flags); handle->handle_type = retval; /* convert handle size to bytes */ handle_bytes = handle_dwords * sizeof(u32); handle->handle_bytes = handle_bytes; if ((handle->handle_bytes > f_handle.handle_bytes) || (retval == FILEID_INVALID) || (retval < 0)) { /* As per old exportfs_encode_fh documentation * we could return ENOSPC to indicate overflow * But file system returned 255 always. So handle * both the values */ if (retval == FILEID_INVALID || retval == -ENOSPC) retval = -EOVERFLOW; /* * set the handle size to zero so we copy only * non variable part of the file_handle */ handle_bytes = 0; } else { /* * When asked to encode a connectable file handle, encode this * property in the file handle itself, so that we later know * how to decode it. * For sanity, also encode in the file handle if the encoded * object is a directory and verify this during decode, because * decoding directory file handles is quite different than * decoding connectable non-directory file handles. */ if (fh_flags & EXPORT_FH_CONNECTABLE) { handle->handle_type |= FILEID_IS_CONNECTABLE; if (d_is_dir(path->dentry)) fh_flags |= FILEID_IS_DIR; } retval = 0; } /* copy the mount id */ if (unique_mntid) { if (put_user(real_mount(path->mnt)->mnt_id_unique, (u64 __user *) mnt_id)) retval = -EFAULT; } else { if (put_user(real_mount(path->mnt)->mnt_id, (int __user *) mnt_id)) retval = -EFAULT; } /* copy the handle */ if (retval != -EFAULT && copy_to_user(ufh, handle, struct_size(handle, f_handle, handle_bytes))) retval = -EFAULT; kfree(handle); return retval; } /** * sys_name_to_handle_at: convert name to handle * @dfd: directory relative to which name is interpreted if not absolute * @name: name that should be converted to handle. * @handle: resulting file handle * @mnt_id: mount id of the file system containing the file * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int) * @flag: flag value to indicate whether to follow symlink or not * and whether a decodable file handle is required. * * @handle->handle_size indicate the space available to store the * variable part of the file handle in bytes. If there is not * enough space, the field is updated to return the minimum * value required. */ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, void __user *, mnt_id, int, flag) { struct path path; int lookup_flags; int fh_flags = 0; int err; if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID | AT_HANDLE_MNT_ID_UNIQUE | AT_HANDLE_CONNECTABLE)) return -EINVAL; /* * AT_HANDLE_FID means there is no intention to decode file handle * AT_HANDLE_CONNECTABLE means there is an intention to decode a * connected fd (with known path), so these flags are conflicting. * AT_EMPTY_PATH could be used along with a dfd that refers to a * disconnected non-directory, which cannot be used to encode a * connectable file handle, because its parent is unknown. */ if (flag & AT_HANDLE_CONNECTABLE && flag & (AT_HANDLE_FID | AT_EMPTY_PATH)) return -EINVAL; else if (flag & AT_HANDLE_FID) fh_flags |= EXPORT_FH_FID; else if (flag & AT_HANDLE_CONNECTABLE) fh_flags |= EXPORT_FH_CONNECTABLE; lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; err = user_path_at(dfd, name, lookup_flags, &path); if (!err) { err = do_sys_name_to_handle(&path, handle, mnt_id, flag & AT_HANDLE_MNT_ID_UNIQUE, fh_flags); path_put(&path); } return err; } static int get_path_from_fd(int fd, struct path *root) { if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); *root = fs->pwd; path_get(root); spin_unlock(&fs->lock); } else { CLASS(fd, f)(fd); if (fd_empty(f)) return -EBADF; *root = fd_file(f)->f_path; path_get(root); } return 0; } static int vfs_dentry_acceptable(void *context, struct dentry *dentry) { struct handle_to_path_ctx *ctx = context; struct user_namespace *user_ns = current_user_ns(); struct dentry *d, *root = ctx->root.dentry; struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt); int retval = 0; if (!root) return 1; /* Old permission model with global CAP_DAC_READ_SEARCH. */ if (!ctx->flags) return 1; /* * It's racy as we're not taking rename_lock but we're able to ignore * permissions and we just need an approximation whether we were able * to follow a path to the file. * * It's also potentially expensive on some filesystems especially if * there is a deep path. */ d = dget(dentry); while (d != root && !IS_ROOT(d)) { struct dentry *parent = dget_parent(d); /* * We know that we have the ability to override DAC permissions * as we've verified this earlier via CAP_DAC_READ_SEARCH. But * we also need to make sure that there aren't any unmapped * inodes in the path that would prevent us from reaching the * file. */ if (!privileged_wrt_inode_uidgid(user_ns, idmap, d_inode(parent))) { dput(d); dput(parent); return retval; } dput(d); d = parent; } if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root) retval = 1; /* * exportfs_decode_fh_raw() does not call acceptable() callback with * a disconnected directory dentry, so we should have reached either * mount fd directory or sb root. */ if (ctx->fh_flags & EXPORT_FH_DIR_ONLY) WARN_ON_ONCE(d != root && d != root->d_sb->s_root); dput(d); return retval; } static int do_handle_to_path(struct file_handle *handle, struct path *path, struct handle_to_path_ctx *ctx) { int handle_dwords; struct vfsmount *mnt = ctx->root.mnt; struct dentry *dentry; /* change the handle size to multiple of sizeof(u32) */ handle_dwords = handle->handle_bytes >> 2; dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle, handle_dwords, handle->handle_type, ctx->fh_flags, vfs_dentry_acceptable, ctx); if (IS_ERR_OR_NULL(dentry)) { if (dentry == ERR_PTR(-ENOMEM)) return -ENOMEM; return -ESTALE; } path->dentry = dentry; path->mnt = mntget(mnt); return 0; } static inline int may_decode_fh(struct handle_to_path_ctx *ctx, unsigned int o_flags) { struct path *root = &ctx->root; if (capable(CAP_DAC_READ_SEARCH)) return 0; /* * Allow relaxed permissions of file handles if the caller has * the ability to mount the filesystem or create a bind-mount of * the provided @mountdirfd. * * In both cases the caller may be able to get an unobstructed * way to the encoded file handle. If the caller is only able to * create a bind-mount we need to verify that there are no * locked mounts on top of it that could prevent us from getting * to the encoded file. * * In principle, locked mounts can prevent the caller from * mounting the filesystem but that only applies to procfs and * sysfs neither of which support decoding file handles. * * Restrict to O_DIRECTORY to provide a deterministic API that * avoids a confusing api in the face of disconnected non-dir * dentries. * * There's only one dentry for each directory inode (VFS rule)... */ if (!(o_flags & O_DIRECTORY)) return -EPERM; if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) ctx->flags = HANDLE_CHECK_PERMS; else if (is_mounted(root->mnt) && ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN) && !has_locked_children(real_mount(root->mnt), root->dentry)) ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; else return -EPERM; /* Are we able to override DAC permissions? */ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) return -EPERM; ctx->fh_flags = EXPORT_FH_DIR_ONLY; return 0; } static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, struct path *path, unsigned int o_flags) { int retval = 0; struct file_handle f_handle; struct file_handle *handle = NULL; struct handle_to_path_ctx ctx = {}; const struct export_operations *eops; retval = get_path_from_fd(mountdirfd, &ctx.root); if (retval) goto out_err; eops = ctx.root.mnt->mnt_sb->s_export_op; if (eops && eops->permission) retval = eops->permission(&ctx, o_flags); else retval = may_decode_fh(&ctx, o_flags); if (retval) goto out_path; if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; goto out_path; } if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || (f_handle.handle_bytes == 0)) { retval = -EINVAL; goto out_path; } if (f_handle.handle_type < 0 || FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) { retval = -EINVAL; goto out_path; } handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { retval = -ENOMEM; goto out_path; } /* copy the full handle */ *handle = f_handle; if (copy_from_user(&handle->f_handle, &ufh->f_handle, f_handle.handle_bytes)) { retval = -EFAULT; goto out_handle; } /* * If handle was encoded with AT_HANDLE_CONNECTABLE, verify that we * are decoding an fd with connected path, which is accessible from * the mount fd path. */ if (f_handle.handle_type & FILEID_IS_CONNECTABLE) { ctx.fh_flags |= EXPORT_FH_CONNECTABLE; ctx.flags |= HANDLE_CHECK_SUBTREE; } if (f_handle.handle_type & FILEID_IS_DIR) ctx.fh_flags |= EXPORT_FH_DIR_ONLY; /* Filesystem code should not be exposed to user flags */ handle->handle_type &= ~FILEID_USER_FLAGS_MASK; retval = do_handle_to_path(handle, path, &ctx); out_handle: kfree(handle); out_path: path_put(&ctx.root); out_err: return retval; } static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag) { long retval = 0; struct path path __free(path_put) = {}; struct file *file; const struct export_operations *eops; retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) return retval; CLASS(get_unused_fd, fd)(O_CLOEXEC); if (fd < 0) return fd; eops = path.mnt->mnt_sb->s_export_op; if (eops->open) file = eops->open(&path, open_flag); else file = file_open_root(&path, "", open_flag, 0); if (IS_ERR(file)) return PTR_ERR(file); fd_install(fd, file); return take_fd(fd); } /** * sys_open_by_handle_at: Open the file handle * @mountdirfd: directory file descriptor * @handle: file handle to be opened * @flags: open flags. * * @mountdirfd indicate the directory file descriptor * of the mount point. file handle is decoded relative * to the vfsmount pointed by the @mountdirfd. @flags * value is same as the open(2) flags. */ SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, struct file_handle __user *, handle, int, flags) { long ret; if (force_o_largefile()) flags |= O_LARGEFILE; ret = do_handle_open(mountdirfd, handle, flags); return ret; } #ifdef CONFIG_COMPAT /* * Exactly like fs/open.c:sys_open_by_handle_at(), except that it * doesn't set the O_LARGEFILE flag. */ COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, struct file_handle __user *, handle, int, flags) { return do_handle_open(mountdirfd, handle, flags); } #endif
24 2 24 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 // SPDX-License-Identifier: GPL-2.0-or-later /* * Handle bridge arp/nd proxy/suppress * * Copyright (C) 2017 Cumulus Networks * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com> * * Authors: * Roopa Prabhu <roopa@cumulusnetworks.com> */ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/neighbour.h> #include <net/arp.h> #include <linux/if_vlan.h> #include <linux/inetdevice.h> #include <net/addrconf.h> #include <net/ipv6_stubs.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif #include "br_private.h" void br_recalculate_neigh_suppress_enabled(struct net_bridge *br) { struct net_bridge_port *p; bool neigh_suppress = false; list_for_each_entry(p, &br->port_list, list) { if (p->flags & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS)) { neigh_suppress = true; break; } } br_opt_toggle(br, BROPT_NEIGH_SUPPRESS_ENABLED, neigh_suppress); } #if IS_ENABLED(CONFIG_INET) static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, struct net_device *dev, __be32 dest_ip, __be32 src_ip, const unsigned char *dest_hw, const unsigned char *src_hw, const unsigned char *target_hw, __be16 vlan_proto, u16 vlan_tci) { struct net_bridge_vlan_group *vg; struct sk_buff *skb; u16 pvid; netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n", dev->name, &dest_ip, dest_hw, &src_ip, src_hw); if (!vlan_tci) { arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, dest_hw, src_hw, target_hw); return; } skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, dest_hw, src_hw, target_hw); if (!skb) return; if (p) vg = nbp_vlan_group_rcu(p); else vg = br_vlan_group_rcu(br); pvid = br_get_pvid(vg); if (pvid == (vlan_tci & VLAN_VID_MASK)) vlan_tci = 0; if (vlan_tci) __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); if (p) { arp_xmit(skb); } else { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->pkt_type = PACKET_HOST; netif_rx(skb); } } static int br_chk_addr_ip(struct net_device *dev, struct netdev_nested_priv *priv) { __be32 ip = *(__be32 *)priv->data; struct in_device *in_dev; __be32 addr = 0; in_dev = __in_dev_get_rcu(dev); if (in_dev) addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip, RT_SCOPE_HOST); if (addr == ip) return 1; return 0; } static bool br_is_local_ip(struct net_device *dev, __be32 ip) { struct netdev_nested_priv priv = { .data = (void *)&ip, }; if (br_chk_addr_ip(dev, &priv)) return true; /* check if ip is configured on upper dev */ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv)) return true; return false; } void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, u16 vid, struct net_bridge_port *p) { struct net_device *dev = br->dev; struct net_device *vlandev = dev; struct neighbour *n; struct arphdr *parp; u8 *arpptr, *sha; __be32 sip, tip; BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; if ((dev->flags & IFF_NOARP) || !pskb_may_pull(skb, arp_hdr_len(dev))) return; parp = arp_hdr(skb); if (parp->ar_pro != htons(ETH_P_IP) || parp->ar_hln != dev->addr_len || parp->ar_pln != 4) return; arpptr = (u8 *)parp + sizeof(struct arphdr); sha = arpptr; arpptr += dev->addr_len; /* sha */ memcpy(&sip, arpptr, sizeof(sip)); arpptr += sizeof(sip); arpptr += dev->addr_len; /* tha */ memcpy(&tip, arpptr, sizeof(tip)); if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) return; if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { if (br_is_neigh_suppress_enabled(p, vid)) return; if (parp->ar_op != htons(ARPOP_RREQUEST) && parp->ar_op != htons(ARPOP_RREPLY) && (ipv4_is_zeronet(sip) || sip == tip)) { /* prevent flooding to neigh suppress ports */ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } } if (parp->ar_op != htons(ARPOP_REQUEST)) return; if (vid != 0) { vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, vid); if (!vlandev) return; } if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) && br_is_local_ip(vlandev, tip)) { /* its our local ip, so don't proxy reply * and don't forward to neigh suppress ports */ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } n = neigh_lookup(&arp_tbl, &tip, vlandev); if (n) { struct net_bridge_fdb_entry *f; if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } f = br_fdb_find_rcu(br, n->ha, vid); if (f) { bool replied = false; if ((p && (p->flags & BR_PROXYARP)) || (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) || br_is_neigh_suppress_enabled(f->dst, vid)) { if (!vid) br_arp_send(br, p, skb->dev, sip, tip, sha, n->ha, sha, 0, 0); else br_arp_send(br, p, skb->dev, sip, tip, sha, n->ha, sha, skb->vlan_proto, skb_vlan_tag_get(skb)); replied = true; } /* If we have replied or as long as we know the * mac, indicate to arp replied */ if (replied || br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; } neigh_release(n); } } #endif #if IS_ENABLED(CONFIG_IPV6) struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *msg) { struct nd_msg *m; m = skb_header_pointer(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), sizeof(*msg), msg); if (!m) return NULL; if (m->icmph.icmp6_code != 0 || (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) return NULL; return m; } static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, struct sk_buff *request, struct neighbour *n, __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) { struct net_device *dev = request->dev; struct net_bridge_vlan_group *vg; struct sk_buff *reply; struct nd_msg *na; struct ipv6hdr *pip6; int na_olen = 8; /* opt hdr + ETH_ALEN for target */ int ns_olen; int i, len; u8 *daddr; u16 pvid; if (!dev) return; len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + sizeof(*na) + na_olen + dev->needed_tailroom; reply = alloc_skb(len, GFP_ATOMIC); if (!reply) return; reply->protocol = htons(ETH_P_IPV6); reply->dev = dev; skb_reserve(reply, LL_RESERVED_SPACE(dev)); skb_push(reply, sizeof(struct ethhdr)); skb_set_mac_header(reply, 0); daddr = eth_hdr(request)->h_source; /* Do we need option processing ? */ ns_olen = request->len - (skb_network_offset(request) + sizeof(struct ipv6hdr)) - sizeof(*ns); for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { if (!ns->opt[i + 1]) { kfree_skb(reply); return; } if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { daddr = ns->opt + i + sizeof(struct nd_opt_hdr); break; } } /* Ethernet header */ ether_addr_copy(eth_hdr(reply)->h_dest, daddr); ether_addr_copy(eth_hdr(reply)->h_source, n->ha); eth_hdr(reply)->h_proto = htons(ETH_P_IPV6); reply->protocol = htons(ETH_P_IPV6); skb_pull(reply, sizeof(struct ethhdr)); skb_set_network_header(reply, 0); skb_put(reply, sizeof(struct ipv6hdr)); /* IPv6 header */ pip6 = ipv6_hdr(reply); memset(pip6, 0, sizeof(struct ipv6hdr)); pip6->version = 6; pip6->priority = ipv6_hdr(request)->priority; pip6->nexthdr = IPPROTO_ICMPV6; pip6->hop_limit = 255; pip6->daddr = ipv6_hdr(request)->saddr; pip6->saddr = *(struct in6_addr *)n->primary_key; skb_pull(reply, sizeof(struct ipv6hdr)); skb_set_transport_header(reply, 0); na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); /* Neighbor Advertisement */ memset(na, 0, sizeof(*na) + na_olen); na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; na->icmph.icmp6_router = (n->flags & NTF_ROUTER) ? 1 : 0; na->icmph.icmp6_override = 1; na->icmph.icmp6_solicited = 1; na->target = ns->target; ether_addr_copy(&na->opt[2], n->ha); na->opt[0] = ND_OPT_TARGET_LL_ADDR; na->opt[1] = na_olen >> 3; na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, sizeof(*na) + na_olen, IPPROTO_ICMPV6, csum_partial(na, sizeof(*na) + na_olen, 0)); pip6->payload_len = htons(sizeof(*na) + na_olen); skb_push(reply, sizeof(struct ipv6hdr)); skb_push(reply, sizeof(struct ethhdr)); reply->ip_summed = CHECKSUM_UNNECESSARY; if (p) vg = nbp_vlan_group_rcu(p); else vg = br_vlan_group_rcu(br); pvid = br_get_pvid(vg); if (pvid == (vlan_tci & VLAN_VID_MASK)) vlan_tci = 0; if (vlan_tci) __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci); netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n", dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha); if (p) { dev_queue_xmit(reply); } else { skb_reset_mac_header(reply); __skb_pull(reply, skb_network_offset(reply)); reply->ip_summed = CHECKSUM_UNNECESSARY; reply->pkt_type = PACKET_HOST; netif_rx(reply); } } static int br_chk_addr_ip6(struct net_device *dev, struct netdev_nested_priv *priv) { struct in6_addr *addr = (struct in6_addr *)priv->data; if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) return 1; return 0; } static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) { struct netdev_nested_priv priv = { .data = (void *)addr, }; if (br_chk_addr_ip6(dev, &priv)) return true; /* check if ip is configured on upper dev */ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv)) return true; return false; } void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, u16 vid, struct net_bridge_port *p, struct nd_msg *msg) { struct net_device *dev = br->dev; struct net_device *vlandev = NULL; struct in6_addr *saddr, *daddr; struct ipv6hdr *iphdr; struct neighbour *n; BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; if (br_is_neigh_suppress_enabled(p, vid)) return; if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && !msg->icmph.icmp6_solicited) { /* prevent flooding to neigh suppress ports */ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) return; iphdr = ipv6_hdr(skb); saddr = &iphdr->saddr; daddr = &iphdr->daddr; if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) { /* prevent flooding to neigh suppress ports */ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } if (vid != 0) { /* build neigh table lookup on the vlan device */ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, vid); if (!vlandev) return; } else { vlandev = dev; } if (br_is_local_ip6(vlandev, &msg->target)) { /* its our own ip, so don't proxy reply * and don't forward to arp suppress ports */ BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev); if (n) { struct net_bridge_fdb_entry *f; if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_release(n); return; } f = br_fdb_find_rcu(br, n->ha, vid); if (f) { bool replied = false; if (br_is_neigh_suppress_enabled(f->dst, vid)) { if (vid != 0) br_nd_send(br, p, skb, n, skb->vlan_proto, skb_vlan_tag_get(skb), msg); else br_nd_send(br, p, skb, n, 0, 0, msg); replied = true; } /* If we have replied or as long as we know the * mac, indicate to NEIGH_SUPPRESS ports that we * have replied */ if (replied || br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; } neigh_release(n); } } #endif bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid) { if (!p) return false; if (!vid) return !!(p->flags & BR_NEIGH_SUPPRESS); if (p->flags & BR_NEIGH_VLAN_SUPPRESS) { struct net_bridge_vlan_group *vg = nbp_vlan_group_rcu(p); struct net_bridge_vlan *v; v = br_vlan_find(vg, vid); if (!v) return false; return !!(v->priv_flags & BR_VLFLAG_NEIGH_SUPPRESS_ENABLED); } else { return !!(p->flags & BR_NEIGH_SUPPRESS); } }
31 13 26 6 12 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 #ifndef LLC_PDU_H #define LLC_PDU_H /* * Copyright (c) 1997 by Procom Technology,Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/if_ether.h> /* Lengths of frame formats */ #define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ #define LLC_PDU_LEN_S 4 #define LLC_PDU_LEN_U 3 /* header and 1 control byte */ /* header and 1 control byte and XID info */ #define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ #define LLC_MGMT_INDIV 0x02 /* station LLC mgmt indiv addr */ #define LLC_MGMT_GRP 0x03 /* station LLC mgmt group addr */ #define LLC_RDE_SAP 0xA6 /* route ... */ /* SAP field bit masks */ #define LLC_ISO_RESERVED_SAP 0x02 #define LLC_SAP_GROUP_DSAP 0x01 #define LLC_SAP_RESP_SSAP 0x01 /* Group/individual DSAP indicator is DSAP field */ #define LLC_PDU_GROUP_DSAP_MASK 0x01 #define LLC_PDU_IS_GROUP_DSAP(pdu) \ ((pdu->dsap & LLC_PDU_GROUP_DSAP_MASK) ? 0 : 1) #define LLC_PDU_IS_INDIV_DSAP(pdu) \ (!(pdu->dsap & LLC_PDU_GROUP_DSAP_MASK) ? 0 : 1) /* Command/response PDU indicator in SSAP field */ #define LLC_PDU_CMD_RSP_MASK 0x01 #define LLC_PDU_CMD 0 #define LLC_PDU_RSP 1 #define LLC_PDU_IS_CMD(pdu) ((pdu->ssap & LLC_PDU_RSP) ? 0 : 1) #define LLC_PDU_IS_RSP(pdu) ((pdu->ssap & LLC_PDU_RSP) ? 1 : 0) /* Get PDU type from 2 lowest-order bits of control field first byte */ #define LLC_PDU_TYPE_I_MASK 0x01 /* 16-bit control field */ #define LLC_PDU_TYPE_S_MASK 0x03 #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 #define LLC_PDU_TYPE_I 0 /* first bit */ #define LLC_PDU_TYPE_S 1 /* first two bits */ #define LLC_PDU_TYPE_U 3 /* first two bits */ #define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) #define LLC_PDU_TYPE_IS_U(pdu) \ (((pdu->ctrl_1 & LLC_PDU_TYPE_U_MASK) == LLC_PDU_TYPE_U) ? 1 : 0) #define LLC_PDU_TYPE_IS_S(pdu) \ (((pdu->ctrl_1 & LLC_PDU_TYPE_S_MASK) == LLC_PDU_TYPE_S) ? 1 : 0) /* U-format PDU control field masks */ #define LLC_U_PF_BIT_MASK 0x10 /* P/F bit mask */ #define LLC_U_PF_IS_1(pdu) ((pdu->ctrl_1 & LLC_U_PF_BIT_MASK) ? 1 : 0) #define LLC_U_PF_IS_0(pdu) ((!(pdu->ctrl_1 & LLC_U_PF_BIT_MASK)) ? 1 : 0) #define LLC_U_PDU_CMD_MASK 0xEC /* cmd/rsp mask */ #define LLC_U_PDU_CMD(pdu) (pdu->ctrl_1 & LLC_U_PDU_CMD_MASK) #define LLC_U_PDU_RSP(pdu) (pdu->ctrl_1 & LLC_U_PDU_CMD_MASK) #define LLC_1_PDU_CMD_UI 0x00 /* Type 1 cmds/rsps */ #define LLC_1_PDU_CMD_XID 0xAC #define LLC_1_PDU_CMD_TEST 0xE0 #define LLC_2_PDU_CMD_SABME 0x6C /* Type 2 cmds/rsps */ #define LLC_2_PDU_CMD_DISC 0x40 #define LLC_2_PDU_RSP_UA 0x60 #define LLC_2_PDU_RSP_DM 0x0C #define LLC_2_PDU_RSP_FRMR 0x84 /* Type 1 operations */ /* XID information field bit masks */ /* LLC format identifier (byte 1) */ #define LLC_XID_FMT_ID 0x81 /* first byte must be this */ /* LLC types/classes identifier (byte 2) */ #define LLC_XID_CLASS_ZEROS_MASK 0xE0 /* these must be zeros */ #define LLC_XID_CLASS_MASK 0x1F /* AND with byte to get below */ #define LLC_XID_NULL_CLASS_1 0x01 /* if NULL LSAP...use these */ #define LLC_XID_NULL_CLASS_2 0x03 #define LLC_XID_NULL_CLASS_3 0x05 #define LLC_XID_NULL_CLASS_4 0x07 #define LLC_XID_NNULL_TYPE_1 0x01 /* if non-NULL LSAP...use these */ #define LLC_XID_NNULL_TYPE_2 0x02 #define LLC_XID_NNULL_TYPE_3 0x04 #define LLC_XID_NNULL_TYPE_1_2 0x03 #define LLC_XID_NNULL_TYPE_1_3 0x05 #define LLC_XID_NNULL_TYPE_2_3 0x06 #define LLC_XID_NNULL_ALL 0x07 /* Sender Receive Window (byte 3) */ #define LLC_XID_RW_MASK 0xFE /* AND with value to get below */ #define LLC_XID_MIN_RW 0x02 /* lowest-order bit always zero */ /* Type 2 operations */ #define LLC_2_SEQ_NBR_MODULO ((u8) 128) /* I-PDU masks ('ctrl' is I-PDU control word) */ #define LLC_I_GET_NS(pdu) (u8)((pdu->ctrl_1 & 0xFE) >> 1) #define LLC_I_GET_NR(pdu) (u8)((pdu->ctrl_2 & 0xFE) >> 1) #define LLC_I_PF_BIT_MASK 0x01 #define LLC_I_PF_IS_0(pdu) ((!(pdu->ctrl_2 & LLC_I_PF_BIT_MASK)) ? 1 : 0) #define LLC_I_PF_IS_1(pdu) ((pdu->ctrl_2 & LLC_I_PF_BIT_MASK) ? 1 : 0) /* S-PDU supervisory commands and responses */ #define LLC_S_PDU_CMD_MASK 0x0C #define LLC_S_PDU_CMD(pdu) (pdu->ctrl_1 & LLC_S_PDU_CMD_MASK) #define LLC_S_PDU_RSP(pdu) (pdu->ctrl_1 & LLC_S_PDU_CMD_MASK) #define LLC_2_PDU_CMD_RR 0x00 /* rx ready cmd */ #define LLC_2_PDU_RSP_RR 0x00 /* rx ready rsp */ #define LLC_2_PDU_CMD_REJ 0x08 /* reject PDU cmd */ #define LLC_2_PDU_RSP_REJ 0x08 /* reject PDU rsp */ #define LLC_2_PDU_CMD_RNR 0x04 /* rx not ready cmd */ #define LLC_2_PDU_RSP_RNR 0x04 /* rx not ready rsp */ #define LLC_S_PF_BIT_MASK 0x01 #define LLC_S_PF_IS_0(pdu) ((!(pdu->ctrl_2 & LLC_S_PF_BIT_MASK)) ? 1 : 0) #define LLC_S_PF_IS_1(pdu) ((pdu->ctrl_2 & LLC_S_PF_BIT_MASK) ? 1 : 0) #define PDU_SUPV_GET_Nr(pdu) ((pdu->ctrl_2 & 0xFE) >> 1) #define PDU_GET_NEXT_Vr(sn) (((sn) + 1) & ~LLC_2_SEQ_NBR_MODULO) /* FRMR information field macros */ #define FRMR_INFO_LENGTH 5 /* 5 bytes of information */ /* * info is pointer to FRMR info field structure; 'rej_ctrl' is byte pointer * (if U-PDU) or word pointer to rejected PDU control field */ #define FRMR_INFO_SET_REJ_CNTRL(info,rej_ctrl) \ info->rej_pdu_ctrl = ((*((u8 *) rej_ctrl) & \ LLC_PDU_TYPE_U) != LLC_PDU_TYPE_U ? \ (u16)*((u16 *) rej_ctrl) : \ (((u16) *((u8 *) rej_ctrl)) & 0x00FF)) /* * Info is pointer to FRMR info field structure; 'vs' is a byte containing * send state variable value in low-order 7 bits (insure the lowest-order * bit remains zero (0)) */ #define FRMR_INFO_SET_Vs(info,vs) (info->curr_ssv = (((u8) vs) << 1)) #define FRMR_INFO_SET_Vr(info,vr) (info->curr_rsv = (((u8) vr) << 1)) /* * Info is pointer to FRMR info field structure; 'cr' is a byte containing * the C/R bit value in the low-order bit */ #define FRMR_INFO_SET_C_R_BIT(info, cr) (info->curr_rsv |= (((u8) cr) & 0x01)) /* * In the remaining five macros, 'info' is pointer to FRMR info field * structure; 'ind' is a byte containing the bit value to set in the * lowest-order bit) */ #define FRMR_INFO_SET_INVALID_PDU_CTRL_IND(info, ind) \ (info->ind_bits = ((info->ind_bits & 0xFE) | (((u8) ind) & 0x01))) #define FRMR_INFO_SET_INVALID_PDU_INFO_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xFD) | (((u8) ind) & 0x02))) #define FRMR_INFO_SET_PDU_INFO_2LONG_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xFB) | (((u8) ind) & 0x04))) #define FRMR_INFO_SET_PDU_INVALID_Nr_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xF7) | (((u8) ind) & 0x08))) #define FRMR_INFO_SET_PDU_INVALID_Ns_IND(info, ind) \ (info->ind_bits = ( (info->ind_bits & 0xEF) | (((u8) ind) & 0x10))) /* Sequence-numbered PDU format (4 bytes in length) */ struct llc_pdu_sn { u8 dsap; u8 ssap; u8 ctrl_1; u8 ctrl_2; } __packed; static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) { return (struct llc_pdu_sn *)skb_network_header(skb); } /* Un-numbered PDU format (3 bytes in length) */ struct llc_pdu_un { u8 dsap; u8 ssap; u8 ctrl_1; } __packed; static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) { return (struct llc_pdu_un *)skb_network_header(skb); } /** * llc_pdu_header_init - initializes pdu header * @skb: input skb that header must be set into it. * @type: type of PDU (U, I or S). * @ssap: source sap. * @dsap: destination sap. * @cr: command/response bit (0 or 1). * * This function sets DSAP, SSAP and command/Response bit in LLC header. */ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; switch (type) { case LLC_PDU_TYPE_U: hlen = 3; break; case LLC_PDU_TYPE_U_XID: hlen = 6; break; } skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); pdu->dsap = dsap; pdu->ssap = ssap; pdu->ssap |= cr; } /** * llc_pdu_decode_sa - extracts, source address (MAC) of input frame * @skb: input skb that source address must be extracted from it. * @sa: pointer to source address (6 byte array). * * This function extracts source address(MAC) of input frame. */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** * llc_pdu_decode_da - extracts dest address of input frame * @skb: input skb that destination address must be extracted from it * @da: pointer to destination address (6 byte array). * * This function extracts destination address(MAC) of input frame. */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** * llc_pdu_decode_ssap - extracts source SAP of input frame * @skb: input skb that source SAP must be extracted from it. * @ssap: source SAP (output argument). * * This function extracts source SAP of input frame. Right bit of SSAP is * command/response bit. */ static inline void llc_pdu_decode_ssap(struct sk_buff *skb, u8 *ssap) { *ssap = llc_pdu_un_hdr(skb)->ssap & 0xFE; } /** * llc_pdu_decode_dsap - extracts dest SAP of input frame * @skb: input skb that destination SAP must be extracted from it. * @dsap: destination SAP (output argument). * * This function extracts destination SAP of input frame. right bit of * DSAP designates individual/group SAP. */ static inline void llc_pdu_decode_dsap(struct sk_buff *skb, u8 *dsap) { *dsap = llc_pdu_un_hdr(skb)->dsap & 0xFE; } /** * llc_pdu_init_as_ui_cmd - sets LLC header as UI PDU * @skb: input skb that header must be set into it. * * This function sets third byte of LLC header as a UI PDU. */ static inline void llc_pdu_init_as_ui_cmd(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_UI; } /** * llc_pdu_init_as_test_cmd - sets PDU as TEST * @skb: Address of the skb to build * * Sets a PDU as TEST */ static inline void llc_pdu_init_as_test_cmd(struct sk_buff *skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_TEST; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; } /** * llc_pdu_init_as_test_rsp - build TEST response PDU * @skb: Address of the skb to build * @ev_skb: The received TEST command PDU frame * * Builds a pdu frame as a TEST response. */ static inline void llc_pdu_init_as_test_rsp(struct sk_buff *skb, struct sk_buff *ev_skb) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_TEST; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; if (ev_skb->protocol == htons(ETH_P_802_2)) { struct llc_pdu_un *ev_pdu = llc_pdu_un_hdr(ev_skb); int dsize; dsize = ntohs(eth_hdr(ev_skb)->h_proto) - 3; memcpy(((u8 *)pdu) + 3, ((u8 *)ev_pdu) + 3, dsize); skb_put(skb, dsize); } } /* LLC Type 1 XID command/response information fields format */ struct llc_xid_info { u8 fmt_id; /* always 0x81 for LLC */ u8 type; /* different if NULL/non-NULL LSAP */ u8 rw; /* sender receive window */ } __packed; /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID * @skb: input skb that header must be set into it. * @svcs_supported: The class of the LLC (I or II) * @rx_window: The size of the receive window of the LLC * * This function sets third,fourth,fifth and sixth bytes of LLC header as * a XID PDU. */ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, u8 svcs_supported, u8 rx_window) { struct llc_xid_info *xid_info; struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_XID; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; xid_info = (struct llc_xid_info *)(((u8 *)&pdu->ctrl_1) + 1); xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ /* no need to push/put since llc_pdu_header_init() has already * pushed 3 + 3 bytes */ } /** * llc_pdu_init_as_xid_rsp - builds XID response PDU * @skb: Address of the skb to build * @svcs_supported: The class of the LLC (I or II) * @rx_window: The size of the receive window of the LLC * * Builds a pdu frame as an XID response. */ static inline void llc_pdu_init_as_xid_rsp(struct sk_buff *skb, u8 svcs_supported, u8 rx_window) { struct llc_xid_info *xid_info; struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_1_PDU_CMD_XID; pdu->ctrl_1 |= LLC_U_PF_BIT_MASK; xid_info = (struct llc_xid_info *)(((u8 *)&pdu->ctrl_1) + 1); xid_info->fmt_id = LLC_XID_FMT_ID; xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; skb_put(skb, sizeof(struct llc_xid_info)); } /* LLC Type 2 FRMR response information field format */ struct llc_frmr_info { u16 rej_pdu_ctrl; /* bits 1-8 if U-PDU */ u8 curr_ssv; /* current send state variable val */ u8 curr_rsv; /* current receive state variable */ u8 ind_bits; /* indicator bits set with macro */ } __packed; void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); void llc_pdu_decode_pf_bit(struct sk_buff *skb, u8 *pf_bit); void llc_pdu_init_as_disc_cmd(struct sk_buff *skb, u8 p_bit); void llc_pdu_init_as_i_cmd(struct sk_buff *skb, u8 p_bit, u8 ns, u8 nr); void llc_pdu_init_as_rej_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); void llc_pdu_init_as_rnr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); void llc_pdu_init_as_rr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr); void llc_pdu_init_as_sabme_cmd(struct sk_buff *skb, u8 p_bit); void llc_pdu_init_as_dm_rsp(struct sk_buff *skb, u8 f_bit); void llc_pdu_init_as_frmr_rsp(struct sk_buff *skb, struct llc_pdu_sn *prev_pdu, u8 f_bit, u8 vs, u8 vr, u8 vzyxw); void llc_pdu_init_as_rr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr); void llc_pdu_init_as_rej_rsp(struct sk_buff *skb, u8 f_bit, u8 nr); void llc_pdu_init_as_rnr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr); void llc_pdu_init_as_ua_rsp(struct sk_buff *skb, u8 f_bit); #endif /* LLC_PDU_H */
14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _LINUX_RSTREASON_H #define _LINUX_RSTREASON_H #include <net/dropreason-core.h> #include <uapi/linux/mptcp.h> #define DEFINE_RST_REASON(FN, FNe) \ FN(NOT_SPECIFIED) \ FN(NO_SOCKET) \ FN(TCP_INVALID_ACK_SEQUENCE) \ FN(TCP_RFC7323_PAWS) \ FN(TCP_TOO_OLD_ACK) \ FN(TCP_ACK_UNSENT_DATA) \ FN(TCP_FLAGS) \ FN(TCP_OLD_ACK) \ FN(TCP_ABORT_ON_DATA) \ FN(TCP_TIMEWAIT_SOCKET) \ FN(INVALID_SYN) \ FN(TCP_ABORT_ON_CLOSE) \ FN(TCP_ABORT_ON_LINGER) \ FN(TCP_ABORT_ON_MEMORY) \ FN(TCP_STATE) \ FN(TCP_KEEPALIVE_TIMEOUT) \ FN(TCP_DISCONNECT_WITH_DATA) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ FN(MPTCP_RST_EPROHIBIT) \ FN(MPTCP_RST_EWQ2BIG) \ FN(MPTCP_RST_EBADPERF) \ FN(MPTCP_RST_EMIDDLEBOX) \ FN(ERROR) \ FNe(MAX) /** * enum sk_rst_reason - the reasons of socket reset * * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols. * * There are three parts in order: * 1) skb drop reasons: relying on drop reasons for such as passive reset * 2) independent reset reasons: such as active reset reasons * 3) reset reasons in MPTCP: only for MPTCP use */ enum sk_rst_reason { /* Refer to include/net/dropreason-core.h * Rely on skb drop reasons because it indicates exactly why RST * could happen. */ /** @SK_RST_REASON_NOT_SPECIFIED: reset reason is not specified */ SK_RST_REASON_NOT_SPECIFIED, /** @SK_RST_REASON_NO_SOCKET: no valid socket that can be used */ SK_RST_REASON_NO_SOCKET, /** * @SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ * field because ack sequence is not in the window between snd_una * and snd_nxt */ SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE, /** * @SK_RST_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED */ SK_RST_REASON_TCP_RFC7323_PAWS, /** @SK_RST_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */ SK_RST_REASON_TCP_TOO_OLD_ACK, /** * @SK_RST_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't * sent yet */ SK_RST_REASON_TCP_ACK_UNSENT_DATA, /** @SK_RST_REASON_TCP_FLAGS: TCP flags invalid */ SK_RST_REASON_TCP_FLAGS, /** @SK_RST_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */ SK_RST_REASON_TCP_OLD_ACK, /** * @SK_RST_REASON_TCP_ABORT_ON_DATA: abort on data * corresponding to LINUX_MIB_TCPABORTONDATA */ SK_RST_REASON_TCP_ABORT_ON_DATA, /* Here start with the independent reasons */ /** @SK_RST_REASON_TCP_TIMEWAIT_SOCKET: happen on the timewait socket */ SK_RST_REASON_TCP_TIMEWAIT_SOCKET, /** * @SK_RST_REASON_INVALID_SYN: receive bad syn packet * RFC 793 says if the state is not CLOSED/LISTEN/SYN-SENT then * "fourth, check the SYN bit,...If the SYN is in the window it is * an error, send a reset" */ SK_RST_REASON_INVALID_SYN, /** * @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close * corresponding to LINUX_MIB_TCPABORTONCLOSE */ SK_RST_REASON_TCP_ABORT_ON_CLOSE, /** * @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger * corresponding to LINUX_MIB_TCPABORTONLINGER */ SK_RST_REASON_TCP_ABORT_ON_LINGER, /** * @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory * corresponding to LINUX_MIB_TCPABORTONMEMORY */ SK_RST_REASON_TCP_ABORT_ON_MEMORY, /** * @SK_RST_REASON_TCP_STATE: abort on tcp state * Please see RFC 9293 for all possible reset conditions */ SK_RST_REASON_TCP_STATE, /** * @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout * When we have already run out of all the chances, which means * keepalive timeout, we have to reset the connection */ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT, /** * @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write * queue is not empty * It means user has written data into the write queue when doing * disconnecting, so we have to send an RST. */ SK_RST_REASON_TCP_DISCONNECT_WITH_DATA, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to * RFC 8684. So do not touch them. I'm going to list each definition * of them respectively. */ /** * @SK_RST_REASON_MPTCP_RST_EUNSPEC: Unspecified error. * This is the default error; it implies that the subflow is no * longer available. The presence of this option shows that the * RST was generated by an MPTCP-aware device. */ SK_RST_REASON_MPTCP_RST_EUNSPEC, /** * @SK_RST_REASON_MPTCP_RST_EMPTCP: MPTCP-specific error. * An error has been detected in the processing of MPTCP options. * This is the usual reason code to return in the cases where a RST * is being sent to close a subflow because of an invalid response. */ SK_RST_REASON_MPTCP_RST_EMPTCP, /** * @SK_RST_REASON_MPTCP_RST_ERESOURCE: Lack of resources. * This code indicates that the sending host does not have enough * resources to support the terminated subflow. */ SK_RST_REASON_MPTCP_RST_ERESOURCE, /** * @SK_RST_REASON_MPTCP_RST_EPROHIBIT: Administratively prohibited. * This code indicates that the requested subflow is prohibited by * the policies of the sending host. */ SK_RST_REASON_MPTCP_RST_EPROHIBIT, /** * @SK_RST_REASON_MPTCP_RST_EWQ2BIG: Too much outstanding data. * This code indicates that there is an excessive amount of data * that needs to be transmitted over the terminated subflow while * having already been acknowledged over one or more other subflows. * This may occur if a path has been unavailable for a short period * and it is more efficient to reset and start again than it is to * retransmit the queued data. */ SK_RST_REASON_MPTCP_RST_EWQ2BIG, /** * @SK_RST_REASON_MPTCP_RST_EBADPERF: Unacceptable performance. * This code indicates that the performance of this subflow was * too low compared to the other subflows of this Multipath TCP * connection. */ SK_RST_REASON_MPTCP_RST_EBADPERF, /** * @SK_RST_REASON_MPTCP_RST_EMIDDLEBOX: Middlebox interference. * Middlebox interference has been detected over this subflow, * making MPTCP signaling invalid. For example, this may be sent * if the checksum does not validate. */ SK_RST_REASON_MPTCP_RST_EMIDDLEBOX, /** @SK_RST_REASON_ERROR: unexpected error happens */ SK_RST_REASON_ERROR, /** * @SK_RST_REASON_MAX: Maximum of socket reset reasons. * It shouldn't be used as a real 'reason'. */ SK_RST_REASON_MAX, }; /* Convert skb drop reasons to enum sk_rst_reason type */ static inline enum sk_rst_reason sk_rst_convert_drop_reason(enum skb_drop_reason reason) { switch (reason) { case SKB_DROP_REASON_NOT_SPECIFIED: return SK_RST_REASON_NOT_SPECIFIED; case SKB_DROP_REASON_NO_SOCKET: return SK_RST_REASON_NO_SOCKET; case SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: return SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE; case SKB_DROP_REASON_TCP_RFC7323_PAWS: return SK_RST_REASON_TCP_RFC7323_PAWS; case SKB_DROP_REASON_TCP_TOO_OLD_ACK: return SK_RST_REASON_TCP_TOO_OLD_ACK; case SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: return SK_RST_REASON_TCP_ACK_UNSENT_DATA; case SKB_DROP_REASON_TCP_FLAGS: return SK_RST_REASON_TCP_FLAGS; case SKB_DROP_REASON_TCP_OLD_ACK: return SK_RST_REASON_TCP_OLD_ACK; case SKB_DROP_REASON_TCP_ABORT_ON_DATA: return SK_RST_REASON_TCP_ABORT_ON_DATA; default: /* If we don't have our own corresponding reason */ return SK_RST_REASON_NOT_SPECIFIED; } } #endif
2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 // SPDX-License-Identifier: GPL-2.0-only /* * CAN driver for esd electronics gmbh CAN-USB/2, CAN-USB/3 and CAN-USB/Micro * * Copyright (C) 2010-2012 esd electronic system design gmbh, Matthias Fuchs <socketcan@esd.eu> * Copyright (C) 2022-2024 esd electronics gmbh, Frank Jungclaus <frank.jungclaus@esd.eu> */ #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/ethtool.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/signal.h> #include <linux/slab.h> #include <linux/units.h> #include <linux/usb.h> MODULE_AUTHOR("Matthias Fuchs <socketcan@esd.eu>"); MODULE_AUTHOR("Frank Jungclaus <frank.jungclaus@esd.eu>"); MODULE_DESCRIPTION("CAN driver for esd electronics gmbh CAN-USB/2, CAN-USB/3 and CAN-USB/Micro interfaces"); MODULE_LICENSE("GPL v2"); /* USB vendor and product ID */ #define ESD_USB_ESDGMBH_VENDOR_ID 0x0ab4 #define ESD_USB_CANUSB2_PRODUCT_ID 0x0010 #define ESD_USB_CANUSBM_PRODUCT_ID 0x0011 #define ESD_USB_CANUSB3_PRODUCT_ID 0x0014 /* CAN controller clock frequencies */ #define ESD_USB_2_CAN_CLOCK (60 * MEGA) /* Hz */ #define ESD_USB_M_CAN_CLOCK (36 * MEGA) /* Hz */ #define ESD_USB_3_CAN_CLOCK (80 * MEGA) /* Hz */ /* Maximum number of CAN nets */ #define ESD_USB_MAX_NETS 2 /* USB commands */ #define ESD_USB_CMD_VERSION 1 /* also used for VERSION_REPLY */ #define ESD_USB_CMD_CAN_RX 2 /* device to host only */ #define ESD_USB_CMD_CAN_TX 3 /* also used for TX_DONE */ #define ESD_USB_CMD_SETBAUD 4 /* also used for SETBAUD_REPLY */ #define ESD_USB_CMD_TS 5 /* also used for TS_REPLY */ #define ESD_USB_CMD_IDADD 6 /* also used for IDADD_REPLY */ /* esd CAN message flags - dlc field */ #define ESD_USB_RTR BIT(4) #define ESD_USB_NO_BRS BIT(4) #define ESD_USB_ESI BIT(5) #define ESD_USB_FD BIT(7) /* esd CAN message flags - id field */ #define ESD_USB_EXTID BIT(29) #define ESD_USB_EVENT BIT(30) #define ESD_USB_IDMASK GENMASK(28, 0) /* esd CAN event ids */ #define ESD_USB_EV_CAN_ERROR_EXT 2 /* CAN controller specific diagnostic data */ /* baudrate message flags */ #define ESD_USB_LOM BIT(30) /* Listen Only Mode */ #define ESD_USB_UBR BIT(31) /* User Bit Rate (controller BTR) in bits 0..27 */ #define ESD_USB_NO_BAUDRATE GENMASK(30, 0) /* bit rate unconfigured */ /* bit timing esd CAN-USB */ #define ESD_USB_2_TSEG1_SHIFT 16 #define ESD_USB_2_TSEG2_SHIFT 20 #define ESD_USB_2_SJW_SHIFT 14 #define ESD_USB_M_SJW_SHIFT 24 #define ESD_USB_TRIPLE_SAMPLES BIT(23) /* Transmitter Delay Compensation */ #define ESD_USB_3_TDC_MODE_AUTO 0 /* esd IDADD message */ #define ESD_USB_ID_ENABLE BIT(7) #define ESD_USB_MAX_ID_SEGMENT 64 /* SJA1000 ECC register (emulated by usb firmware) */ #define ESD_USB_SJA1000_ECC_SEG GENMASK(4, 0) #define ESD_USB_SJA1000_ECC_DIR BIT(5) #define ESD_USB_SJA1000_ECC_ERR BIT(2, 1) #define ESD_USB_SJA1000_ECC_BIT 0x00 #define ESD_USB_SJA1000_ECC_FORM BIT(6) #define ESD_USB_SJA1000_ECC_STUFF BIT(7) #define ESD_USB_SJA1000_ECC_MASK GENMASK(7, 6) /* esd bus state event codes */ #define ESD_USB_BUSSTATE_MASK GENMASK(7, 6) #define ESD_USB_BUSSTATE_WARN BIT(6) #define ESD_USB_BUSSTATE_ERRPASSIVE BIT(7) #define ESD_USB_BUSSTATE_BUSOFF GENMASK(7, 6) #define ESD_USB_RX_BUFFER_SIZE 1024 #define ESD_USB_MAX_RX_URBS 4 #define ESD_USB_MAX_TX_URBS 16 /* must be power of 2 */ /* Modes for CAN-USB/3, to be used for esd_usb_3_set_baudrate_msg_x.mode */ #define ESD_USB_3_BAUDRATE_MODE_DISABLE 0 /* remove from bus */ #define ESD_USB_3_BAUDRATE_MODE_INDEX 1 /* ESD (CiA) bit rate idx */ #define ESD_USB_3_BAUDRATE_MODE_BTR_CTRL 2 /* BTR values (controller)*/ #define ESD_USB_3_BAUDRATE_MODE_BTR_CANONICAL 3 /* BTR values (canonical) */ #define ESD_USB_3_BAUDRATE_MODE_NUM 4 /* numerical bit rate */ #define ESD_USB_3_BAUDRATE_MODE_AUTOBAUD 5 /* autobaud */ /* Flags for CAN-USB/3, to be used for esd_usb_3_set_baudrate_msg_x.flags */ #define ESD_USB_3_BAUDRATE_FLAG_FD BIT(0) /* enable CAN FD mode */ #define ESD_USB_3_BAUDRATE_FLAG_LOM BIT(1) /* enable listen only mode */ #define ESD_USB_3_BAUDRATE_FLAG_STM BIT(2) /* enable self test mode */ #define ESD_USB_3_BAUDRATE_FLAG_TRS BIT(3) /* enable triple sampling */ #define ESD_USB_3_BAUDRATE_FLAG_TXP BIT(4) /* enable transmit pause */ struct esd_usb_header_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 rsvd[2]; }; struct esd_usb_version_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 rsvd; u8 flags; __le32 drv_version; }; struct esd_usb_version_reply_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 nets; u8 features; __le32 version; u8 name[16]; __le32 rsvd; __le32 ts; }; struct esd_usb_rx_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 dlc; __le32 ts; __le32 id; /* upper 3 bits contain flags */ union { u8 data[CAN_MAX_DLEN]; u8 data_fd[CANFD_MAX_DLEN]; struct { u8 status; /* CAN Controller Status */ u8 ecc; /* Error Capture Register */ u8 rec; /* RX Error Counter */ u8 tec; /* TX Error Counter */ } ev_can_err_ext; /* For ESD_EV_CAN_ERROR_EXT */ }; }; struct esd_usb_tx_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 dlc; u32 hnd; /* opaque handle, not used by device */ __le32 id; /* upper 3 bits contain flags */ union { u8 data[CAN_MAX_DLEN]; u8 data_fd[CANFD_MAX_DLEN]; }; }; struct esd_usb_tx_done_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 status; u32 hnd; /* opaque handle, not used by device */ __le32 ts; }; struct esd_usb_id_filter_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 option; __le32 mask[ESD_USB_MAX_ID_SEGMENT + 1]; /* +1 for 29bit extended IDs */ }; struct esd_usb_set_baudrate_msg { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 rsvd; __le32 baud; }; /* CAN-USB/3 baudrate configuration, used for nominal as well as for data bit rate */ struct esd_usb_3_baudrate_cfg { __le16 brp; /* bit rate pre-scaler */ __le16 tseg1; /* time segment before sample point */ __le16 tseg2; /* time segment after sample point */ __le16 sjw; /* synchronization jump Width */ }; /* In principle, the esd CAN-USB/3 supports Transmitter Delay Compensation (TDC), * but currently only the automatic TDC mode is supported by this driver. * An implementation for manual TDC configuration will follow. * * For information about struct esd_usb_3_tdc_cfg, see * NTCAN Application Developers Manual, 6.2.25 NTCAN_TDC_CFG + related chapters * https://esd.eu/fileadmin/esd/docs/manuals/NTCAN_Part1_Function_API_Manual_en_56.pdf */ struct esd_usb_3_tdc_cfg { u8 tdc_mode; /* transmitter delay compensation mode */ u8 ssp_offset; /* secondary sample point offset in mtq */ s8 ssp_shift; /* secondary sample point shift in mtq */ u8 tdc_filter; /* TDC filter in mtq */ }; /* Extended version of the above set_baudrate_msg for a CAN-USB/3 * to define the CAN bit timing configuration of the CAN controller in * CAN FD mode as well as in Classical CAN mode. * * The payload of this command is a NTCAN_BAUDRATE_X structure according to * esd electronics gmbh, NTCAN Application Developers Manual, 6.2.15 NTCAN_BAUDRATE_X * https://esd.eu/fileadmin/esd/docs/manuals/NTCAN_Part1_Function_API_Manual_en_56.pdf */ struct esd_usb_3_set_baudrate_msg_x { u8 len; /* total message length in 32bit words */ u8 cmd; u8 net; u8 rsvd; /*reserved */ /* Payload ... */ __le16 mode; /* mode word, see ESD_USB_3_BAUDRATE_MODE_xxx */ __le16 flags; /* control flags, see ESD_USB_3_BAUDRATE_FLAG_xxx */ struct esd_usb_3_tdc_cfg tdc; /* TDC configuration */ struct esd_usb_3_baudrate_cfg nom; /* nominal bit rate */ struct esd_usb_3_baudrate_cfg data; /* data bit rate */ }; /* Main message type used between library and application */ union __packed esd_usb_msg { struct esd_usb_header_msg hdr; struct esd_usb_version_msg version; struct esd_usb_version_reply_msg version_reply; struct esd_usb_rx_msg rx; struct esd_usb_tx_msg tx; struct esd_usb_tx_done_msg txdone; struct esd_usb_set_baudrate_msg setbaud; struct esd_usb_3_set_baudrate_msg_x setbaud_x; struct esd_usb_id_filter_msg filter; }; static struct usb_device_id esd_usb_table[] = { {USB_DEVICE(ESD_USB_ESDGMBH_VENDOR_ID, ESD_USB_CANUSB2_PRODUCT_ID)}, {USB_DEVICE(ESD_USB_ESDGMBH_VENDOR_ID, ESD_USB_CANUSBM_PRODUCT_ID)}, {USB_DEVICE(ESD_USB_ESDGMBH_VENDOR_ID, ESD_USB_CANUSB3_PRODUCT_ID)}, {} }; MODULE_DEVICE_TABLE(usb, esd_usb_table); struct esd_usb_net_priv; struct esd_tx_urb_context { struct esd_usb_net_priv *priv; u32 echo_index; }; struct esd_usb { struct usb_device *udev; struct esd_usb_net_priv *nets[ESD_USB_MAX_NETS]; struct usb_anchor rx_submitted; int net_count; u32 version; int rxinitdone; void *rxbuf[ESD_USB_MAX_RX_URBS]; dma_addr_t rxbuf_dma[ESD_USB_MAX_RX_URBS]; }; struct esd_usb_net_priv { struct can_priv can; /* must be the first member */ atomic_t active_tx_jobs; struct usb_anchor tx_submitted; struct esd_tx_urb_context tx_contexts[ESD_USB_MAX_TX_URBS]; struct esd_usb *usb; struct net_device *netdev; int index; u8 old_state; struct can_berr_counter bec; }; static void esd_usb_rx_event(struct esd_usb_net_priv *priv, union esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct can_frame *cf; struct sk_buff *skb; u32 id = le32_to_cpu(msg->rx.id) & ESD_USB_IDMASK; if (id == ESD_USB_EV_CAN_ERROR_EXT) { u8 state = msg->rx.ev_can_err_ext.status; u8 ecc = msg->rx.ev_can_err_ext.ecc; priv->bec.rxerr = msg->rx.ev_can_err_ext.rec; priv->bec.txerr = msg->rx.ev_can_err_ext.tec; netdev_dbg(priv->netdev, "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", msg->rx.dlc, state, ecc, priv->bec.rxerr, priv->bec.txerr); /* if berr-reporting is off, only pass through on state change ... */ if (!(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) && state == priv->old_state) return; skb = alloc_can_err_skb(priv->netdev, &cf); if (!skb) stats->rx_dropped++; if (state != priv->old_state) { enum can_state tx_state, rx_state; enum can_state new_state = CAN_STATE_ERROR_ACTIVE; priv->old_state = state; switch (state & ESD_USB_BUSSTATE_MASK) { case ESD_USB_BUSSTATE_BUSOFF: new_state = CAN_STATE_BUS_OFF; can_bus_off(priv->netdev); break; case ESD_USB_BUSSTATE_WARN: new_state = CAN_STATE_ERROR_WARNING; break; case ESD_USB_BUSSTATE_ERRPASSIVE: new_state = CAN_STATE_ERROR_PASSIVE; break; default: new_state = CAN_STATE_ERROR_ACTIVE; priv->bec.txerr = 0; priv->bec.rxerr = 0; break; } if (new_state != priv->can.state) { tx_state = (priv->bec.txerr >= priv->bec.rxerr) ? new_state : 0; rx_state = (priv->bec.txerr <= priv->bec.rxerr) ? new_state : 0; can_change_state(priv->netdev, cf, tx_state, rx_state); } } else if (skb) { priv->can.can_stats.bus_error++; stats->rx_errors++; cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; switch (ecc & ESD_USB_SJA1000_ECC_MASK) { case ESD_USB_SJA1000_ECC_BIT: cf->data[2] |= CAN_ERR_PROT_BIT; break; case ESD_USB_SJA1000_ECC_FORM: cf->data[2] |= CAN_ERR_PROT_FORM; break; case ESD_USB_SJA1000_ECC_STUFF: cf->data[2] |= CAN_ERR_PROT_STUFF; break; default: break; } /* Error occurred during transmission? */ if (!(ecc & ESD_USB_SJA1000_ECC_DIR)) cf->data[2] |= CAN_ERR_PROT_TX; /* Bit stream position in CAN frame as the error was detected */ cf->data[3] = ecc & ESD_USB_SJA1000_ECC_SEG; } if (skb) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = priv->bec.txerr; cf->data[7] = priv->bec.rxerr; netif_rx(skb); } } } static void esd_usb_rx_can_msg(struct esd_usb_net_priv *priv, union esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct can_frame *cf; struct canfd_frame *cfd; struct sk_buff *skb; u32 id; u8 len; if (!netif_device_present(priv->netdev)) return; id = le32_to_cpu(msg->rx.id); if (id & ESD_USB_EVENT) { esd_usb_rx_event(priv, msg); } else { if (msg->rx.dlc & ESD_USB_FD) { skb = alloc_canfd_skb(priv->netdev, &cfd); } else { skb = alloc_can_skb(priv->netdev, &cf); cfd = (struct canfd_frame *)cf; } if (skb == NULL) { stats->rx_dropped++; return; } cfd->can_id = id & ESD_USB_IDMASK; if (msg->rx.dlc & ESD_USB_FD) { /* masking by 0x0F is already done within can_fd_dlc2len() */ cfd->len = can_fd_dlc2len(msg->rx.dlc); len = cfd->len; if ((msg->rx.dlc & ESD_USB_NO_BRS) == 0) cfd->flags |= CANFD_BRS; if (msg->rx.dlc & ESD_USB_ESI) cfd->flags |= CANFD_ESI; } else { can_frame_set_cc_len(cf, msg->rx.dlc & ~ESD_USB_RTR, priv->can.ctrlmode); len = cf->len; if (msg->rx.dlc & ESD_USB_RTR) { cf->can_id |= CAN_RTR_FLAG; len = 0; } } if (id & ESD_USB_EXTID) cfd->can_id |= CAN_EFF_FLAG; memcpy(cfd->data, msg->rx.data_fd, len); stats->rx_bytes += len; stats->rx_packets++; netif_rx(skb); } } static void esd_usb_tx_done_msg(struct esd_usb_net_priv *priv, union esd_usb_msg *msg) { struct net_device_stats *stats = &priv->netdev->stats; struct net_device *netdev = priv->netdev; struct esd_tx_urb_context *context; if (!netif_device_present(netdev)) return; context = &priv->tx_contexts[msg->txdone.hnd & (ESD_USB_MAX_TX_URBS - 1)]; if (!msg->txdone.status) { stats->tx_packets++; stats->tx_bytes += can_get_echo_skb(netdev, context->echo_index, NULL); } else { stats->tx_errors++; can_free_echo_skb(netdev, context->echo_index, NULL); } /* Release context */ context->echo_index = ESD_USB_MAX_TX_URBS; atomic_dec(&priv->active_tx_jobs); netif_wake_queue(netdev); } static void esd_usb_read_bulk_callback(struct urb *urb) { struct esd_usb *dev = urb->context; int retval; int pos = 0; int i; switch (urb->status) { case 0: /* success */ break; case -ENOENT: case -EPIPE: case -EPROTO: case -ESHUTDOWN: return; default: dev_info(dev->udev->dev.parent, "Rx URB aborted (%d)\n", urb->status); goto resubmit_urb; } while (pos < urb->actual_length) { union esd_usb_msg *msg; msg = (union esd_usb_msg *)(urb->transfer_buffer + pos); switch (msg->hdr.cmd) { case ESD_USB_CMD_CAN_RX: if (msg->rx.net >= dev->net_count) { dev_err(dev->udev->dev.parent, "format error\n"); break; } esd_usb_rx_can_msg(dev->nets[msg->rx.net], msg); break; case ESD_USB_CMD_CAN_TX: if (msg->txdone.net >= dev->net_count) { dev_err(dev->udev->dev.parent, "format error\n"); break; } esd_usb_tx_done_msg(dev->nets[msg->txdone.net], msg); break; } pos += msg->hdr.len * sizeof(u32); /* convert to # of bytes */ if (pos > urb->actual_length) { dev_err(dev->udev->dev.parent, "format error\n"); break; } } resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval == -ENODEV) { for (i = 0; i < dev->net_count; i++) { if (dev->nets[i]) netif_device_detach(dev->nets[i]->netdev); } } else if (retval) { dev_err(dev->udev->dev.parent, "failed resubmitting read bulk urb: %d\n", retval); } } /* callback for bulk IN urb */ static void esd_usb_write_bulk_callback(struct urb *urb) { struct esd_tx_urb_context *context = urb->context; struct esd_usb_net_priv *priv; struct net_device *netdev; size_t size = sizeof(union esd_usb_msg); WARN_ON(!context); priv = context->priv; netdev = priv->netdev; /* free up our allocated buffer */ usb_free_coherent(urb->dev, size, urb->transfer_buffer, urb->transfer_dma); if (!netif_device_present(netdev)) return; if (urb->status) netdev_info(netdev, "Tx URB aborted (%d)\n", urb->status); netif_trans_update(netdev); } static ssize_t firmware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d.%d.%d\n", (dev->version >> 12) & 0xf, (dev->version >> 8) & 0xf, dev->version & 0xff); } static DEVICE_ATTR_RO(firmware); static ssize_t hardware_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d.%d.%d\n", (dev->version >> 28) & 0xf, (dev->version >> 24) & 0xf, (dev->version >> 16) & 0xff); } static DEVICE_ATTR_RO(hardware); static ssize_t nets_show(struct device *d, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(d); struct esd_usb *dev = usb_get_intfdata(intf); return sprintf(buf, "%d", dev->net_count); } static DEVICE_ATTR_RO(nets); static int esd_usb_send_msg(struct esd_usb *dev, union esd_usb_msg *msg) { int actual_length; return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), msg, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ &actual_length, 1000); } static int esd_usb_wait_msg(struct esd_usb *dev, union esd_usb_msg *msg) { int actual_length; return usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, 1), msg, sizeof(*msg), &actual_length, 1000); } static int esd_usb_setup_rx_urbs(struct esd_usb *dev) { int i, err = 0; if (dev->rxinitdone) return 0; for (i = 0; i < ESD_USB_MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { err = -ENOMEM; break; } buf = usb_alloc_coherent(dev->udev, ESD_USB_RX_BUFFER_SIZE, GFP_KERNEL, &buf_dma); if (!buf) { dev_warn(dev->udev->dev.parent, "No memory left for USB buffer\n"); err = -ENOMEM; goto freeurb; } urb->transfer_dma = buf_dma; usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &dev->rx_submitted); err = usb_submit_urb(urb, GFP_KERNEL); if (err) { usb_unanchor_urb(urb); usb_free_coherent(dev->udev, ESD_USB_RX_BUFFER_SIZE, buf, urb->transfer_dma); goto freeurb; } dev->rxbuf[i] = buf; dev->rxbuf_dma[i] = buf_dma; freeurb: /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); if (err) break; } /* Did we submit any URBs */ if (i == 0) { dev_err(dev->udev->dev.parent, "couldn't setup read URBs\n"); return err; } /* Warn if we've couldn't transmit all the URBs */ if (i < ESD_USB_MAX_RX_URBS) { dev_warn(dev->udev->dev.parent, "rx performance may be slow\n"); } dev->rxinitdone = 1; return 0; } /* Start interface */ static int esd_usb_start(struct esd_usb_net_priv *priv) { struct esd_usb *dev = priv->usb; struct net_device *netdev = priv->netdev; union esd_usb_msg *msg; int err, i; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out; } /* Enable all IDs * The IDADD message takes up to 64 32 bit bitmasks (2048 bits). * Each bit represents one 11 bit CAN identifier. A set bit * enables reception of the corresponding CAN identifier. A cleared * bit disabled this identifier. An additional bitmask value * following the CAN 2.0A bits is used to enable reception of * extended CAN frames. Only the LSB of this final mask is checked * for the complete 29 bit ID range. The IDADD message also allows * filter configuration for an ID subset. In this case you can add * the number of the starting bitmask (0..64) to the filter.option * field followed by only some bitmasks. */ msg->hdr.cmd = ESD_USB_CMD_IDADD; msg->hdr.len = sizeof(struct esd_usb_id_filter_msg) / sizeof(u32); /* # of 32bit words */ msg->filter.net = priv->index; msg->filter.option = ESD_USB_ID_ENABLE; /* start with segment 0 */ for (i = 0; i < ESD_USB_MAX_ID_SEGMENT; i++) msg->filter.mask[i] = cpu_to_le32(GENMASK(31, 0)); /* enable 29bit extended IDs */ msg->filter.mask[ESD_USB_MAX_ID_SEGMENT] = cpu_to_le32(BIT(0)); err = esd_usb_send_msg(dev, msg); if (err) goto out; err = esd_usb_setup_rx_urbs(dev); if (err) goto out; priv->can.state = CAN_STATE_ERROR_ACTIVE; out: if (err == -ENODEV) netif_device_detach(netdev); if (err) netdev_err(netdev, "couldn't start device: %d\n", err); kfree(msg); return err; } static void unlink_all_urbs(struct esd_usb *dev) { struct esd_usb_net_priv *priv; int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); for (i = 0; i < ESD_USB_MAX_RX_URBS; ++i) usb_free_coherent(dev->udev, ESD_USB_RX_BUFFER_SIZE, dev->rxbuf[i], dev->rxbuf_dma[i]); for (i = 0; i < dev->net_count; i++) { priv = dev->nets[i]; if (priv) { usb_kill_anchored_urbs(&priv->tx_submitted); atomic_set(&priv->active_tx_jobs, 0); for (j = 0; j < ESD_USB_MAX_TX_URBS; j++) priv->tx_contexts[j].echo_index = ESD_USB_MAX_TX_URBS; } } } static int esd_usb_open(struct net_device *netdev) { struct esd_usb_net_priv *priv = netdev_priv(netdev); int err; /* common open */ err = open_candev(netdev); if (err) return err; /* finally start device */ err = esd_usb_start(priv); if (err) { netdev_warn(netdev, "couldn't start device: %d\n", err); close_candev(netdev); return err; } netif_start_queue(netdev); return 0; } static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct esd_usb_net_priv *priv = netdev_priv(netdev); struct esd_usb *dev = priv->usb; struct esd_tx_urb_context *context = NULL; struct net_device_stats *stats = &netdev->stats; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; union esd_usb_msg *msg; struct urb *urb; u8 *buf; int i, err; int ret = NETDEV_TX_OK; size_t size = sizeof(union esd_usb_msg); if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { stats->tx_dropped++; dev_kfree_skb(skb); goto nourbmem; } buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); stats->tx_dropped++; dev_kfree_skb(skb); goto nobufmem; } msg = (union esd_usb_msg *)buf; /* minimal length as # of 32bit words */ msg->hdr.len = offsetof(struct esd_usb_tx_msg, data) / sizeof(u32); msg->hdr.cmd = ESD_USB_CMD_CAN_TX; msg->tx.net = priv->index; if (can_is_canfd_skb(skb)) { msg->tx.dlc = can_fd_len2dlc(cfd->len); msg->tx.dlc |= ESD_USB_FD; if ((cfd->flags & CANFD_BRS) == 0) msg->tx.dlc |= ESD_USB_NO_BRS; } else { msg->tx.dlc = can_get_cc_dlc((struct can_frame *)cfd, priv->can.ctrlmode); if (cfd->can_id & CAN_RTR_FLAG) msg->tx.dlc |= ESD_USB_RTR; } msg->tx.id = cpu_to_le32(cfd->can_id & CAN_ERR_MASK); if (cfd->can_id & CAN_EFF_FLAG) msg->tx.id |= cpu_to_le32(ESD_USB_EXTID); memcpy(msg->tx.data_fd, cfd->data, cfd->len); /* round up, then divide by 4 to add the payload length as # of 32bit words */ msg->hdr.len += DIV_ROUND_UP(cfd->len, sizeof(u32)); for (i = 0; i < ESD_USB_MAX_TX_URBS; i++) { if (priv->tx_contexts[i].echo_index == ESD_USB_MAX_TX_URBS) { context = &priv->tx_contexts[i]; break; } } /* This may never happen */ if (!context) { netdev_warn(netdev, "couldn't find free context\n"); ret = NETDEV_TX_BUSY; goto releasebuf; } context->priv = priv; context->echo_index = i; /* hnd must not be 0 - MSB is stripped in txdone handling */ msg->tx.hnd = BIT(31) | i; /* returned in TX done message */ usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, msg->hdr.len * sizeof(u32), /* convert to # of bytes */ esd_usb_write_bulk_callback, context); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&priv->active_tx_jobs); /* Slow down tx path */ if (atomic_read(&priv->active_tx_jobs) >= ESD_USB_MAX_TX_URBS) netif_stop_queue(netdev); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { can_free_echo_skb(netdev, context->echo_index, NULL); atomic_dec(&priv->active_tx_jobs); usb_unanchor_urb(urb); stats->tx_dropped++; if (err == -ENODEV) netif_device_detach(netdev); else netdev_warn(netdev, "failed tx_urb %d\n", err); goto releasebuf; } netif_trans_update(netdev); /* Release our reference to this URB, the USB core will eventually free * it entirely. */ usb_free_urb(urb); return NETDEV_TX_OK; releasebuf: usb_free_coherent(dev->udev, size, buf, urb->transfer_dma); nobufmem: usb_free_urb(urb); nourbmem: return ret; } static int esd_usb_close(struct net_device *netdev) { struct esd_usb_net_priv *priv = netdev_priv(netdev); union esd_usb_msg *msg; int i; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; /* Disable all IDs (see esd_usb_start()) */ msg->hdr.cmd = ESD_USB_CMD_IDADD; msg->hdr.len = sizeof(struct esd_usb_id_filter_msg) / sizeof(u32);/* # of 32bit words */ msg->filter.net = priv->index; msg->filter.option = ESD_USB_ID_ENABLE; /* start with segment 0 */ for (i = 0; i <= ESD_USB_MAX_ID_SEGMENT; i++) msg->filter.mask[i] = 0; if (esd_usb_send_msg(priv->usb, msg) < 0) netdev_err(netdev, "sending idadd message failed\n"); /* set CAN controller to reset mode */ msg->hdr.len = sizeof(struct esd_usb_set_baudrate_msg) / sizeof(u32); /* # of 32bit words */ msg->hdr.cmd = ESD_USB_CMD_SETBAUD; msg->setbaud.net = priv->index; msg->setbaud.rsvd = 0; msg->setbaud.baud = cpu_to_le32(ESD_USB_NO_BAUDRATE); if (esd_usb_send_msg(priv->usb, msg) < 0) netdev_err(netdev, "sending setbaud message failed\n"); priv->can.state = CAN_STATE_STOPPED; netif_stop_queue(netdev); close_candev(netdev); kfree(msg); return 0; } static const struct net_device_ops esd_usb_netdev_ops = { .ndo_open = esd_usb_open, .ndo_stop = esd_usb_close, .ndo_start_xmit = esd_usb_start_xmit, .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops esd_usb_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; static const struct can_bittiming_const esd_usb_2_bittiming_const = { .name = "esd_usb_2", .tseg1_min = 1, .tseg1_max = 16, .tseg2_min = 1, .tseg2_max = 8, .sjw_max = 4, .brp_min = 1, .brp_max = 1024, .brp_inc = 1, }; static int esd_usb_2_set_bittiming(struct net_device *netdev) { const struct can_bittiming_const *btc = &esd_usb_2_bittiming_const; struct esd_usb_net_priv *priv = netdev_priv(netdev); struct can_bittiming *bt = &priv->can.bittiming; union esd_usb_msg *msg; int err; u32 canbtr; int sjw_shift; canbtr = ESD_USB_UBR; if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) canbtr |= ESD_USB_LOM; canbtr |= (bt->brp - 1) & (btc->brp_max - 1); if (le16_to_cpu(priv->usb->udev->descriptor.idProduct) == ESD_USB_CANUSBM_PRODUCT_ID) sjw_shift = ESD_USB_M_SJW_SHIFT; else sjw_shift = ESD_USB_2_SJW_SHIFT; canbtr |= ((bt->sjw - 1) & (btc->sjw_max - 1)) << sjw_shift; canbtr |= ((bt->prop_seg + bt->phase_seg1 - 1) & (btc->tseg1_max - 1)) << ESD_USB_2_TSEG1_SHIFT; canbtr |= ((bt->phase_seg2 - 1) & (btc->tseg2_max - 1)) << ESD_USB_2_TSEG2_SHIFT; if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) canbtr |= ESD_USB_TRIPLE_SAMPLES; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; msg->hdr.len = sizeof(struct esd_usb_set_baudrate_msg) / sizeof(u32); /* # of 32bit words */ msg->hdr.cmd = ESD_USB_CMD_SETBAUD; msg->setbaud.net = priv->index; msg->setbaud.rsvd = 0; msg->setbaud.baud = cpu_to_le32(canbtr); netdev_dbg(netdev, "setting BTR=%#x\n", canbtr); err = esd_usb_send_msg(priv->usb, msg); kfree(msg); return err; } /* Nominal bittiming constants, see * Microchip SAM E70/S70/V70/V71, Data Sheet, Rev. G - 07/2022 * 48.6.8 MCAN Nominal Bit Timing and Prescaler Register */ static const struct can_bittiming_const esd_usb_3_nom_bittiming_const = { .name = "esd_usb_3", .tseg1_min = 2, .tseg1_max = 256, .tseg2_min = 2, .tseg2_max = 128, .sjw_max = 128, .brp_min = 1, .brp_max = 512, .brp_inc = 1, }; /* Data bittiming constants, see * Microchip SAM E70/S70/V70/V71, Data Sheet, Rev. G - 07/2022 * 48.6.4 MCAN Data Bit Timing and Prescaler Register */ static const struct can_bittiming_const esd_usb_3_data_bittiming_const = { .name = "esd_usb_3", .tseg1_min = 2, .tseg1_max = 32, .tseg2_min = 1, .tseg2_max = 16, .sjw_max = 8, .brp_min = 1, .brp_max = 32, .brp_inc = 1, }; static int esd_usb_3_set_bittiming(struct net_device *netdev) { const struct can_bittiming_const *nom_btc = &esd_usb_3_nom_bittiming_const; const struct can_bittiming_const *data_btc = &esd_usb_3_data_bittiming_const; struct esd_usb_net_priv *priv = netdev_priv(netdev); struct can_bittiming *nom_bt = &priv->can.bittiming; struct can_bittiming *data_bt = &priv->can.data_bittiming; struct esd_usb_3_set_baudrate_msg_x *baud_x; union esd_usb_msg *msg; u16 flags = 0; int err; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; baud_x = &msg->setbaud_x; /* Canonical is the most reasonable mode for SocketCAN on CAN-USB/3 ... */ baud_x->mode = cpu_to_le16(ESD_USB_3_BAUDRATE_MODE_BTR_CANONICAL); if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) flags |= ESD_USB_3_BAUDRATE_FLAG_LOM; baud_x->nom.brp = cpu_to_le16(nom_bt->brp & (nom_btc->brp_max - 1)); baud_x->nom.sjw = cpu_to_le16(nom_bt->sjw & (nom_btc->sjw_max - 1)); baud_x->nom.tseg1 = cpu_to_le16((nom_bt->prop_seg + nom_bt->phase_seg1) & (nom_btc->tseg1_max - 1)); baud_x->nom.tseg2 = cpu_to_le16(nom_bt->phase_seg2 & (nom_btc->tseg2_max - 1)); if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { baud_x->data.brp = cpu_to_le16(data_bt->brp & (data_btc->brp_max - 1)); baud_x->data.sjw = cpu_to_le16(data_bt->sjw & (data_btc->sjw_max - 1)); baud_x->data.tseg1 = cpu_to_le16((data_bt->prop_seg + data_bt->phase_seg1) & (data_btc->tseg1_max - 1)); baud_x->data.tseg2 = cpu_to_le16(data_bt->phase_seg2 & (data_btc->tseg2_max - 1)); flags |= ESD_USB_3_BAUDRATE_FLAG_FD; } /* Currently this driver only supports the automatic TDC mode */ baud_x->tdc.tdc_mode = ESD_USB_3_TDC_MODE_AUTO; baud_x->tdc.ssp_offset = 0; baud_x->tdc.ssp_shift = 0; baud_x->tdc.tdc_filter = 0; baud_x->flags = cpu_to_le16(flags); baud_x->net = priv->index; baud_x->rsvd = 0; /* set len as # of 32bit words */ msg->hdr.len = sizeof(struct esd_usb_3_set_baudrate_msg_x) / sizeof(u32); msg->hdr.cmd = ESD_USB_CMD_SETBAUD; netdev_dbg(netdev, "ctrlmode=%#x/%#x, esd-net=%u, esd-mode=%#x, esd-flags=%#x\n", priv->can.ctrlmode, priv->can.ctrlmode_supported, priv->index, le16_to_cpu(baud_x->mode), flags); err = esd_usb_send_msg(priv->usb, msg); kfree(msg); return err; } static int esd_usb_get_berr_counter(const struct net_device *netdev, struct can_berr_counter *bec) { struct esd_usb_net_priv *priv = netdev_priv(netdev); bec->txerr = priv->bec.txerr; bec->rxerr = priv->bec.rxerr; return 0; } static int esd_usb_set_mode(struct net_device *netdev, enum can_mode mode) { switch (mode) { case CAN_MODE_START: netif_wake_queue(netdev); break; default: return -EOPNOTSUPP; } return 0; } static int esd_usb_probe_one_net(struct usb_interface *intf, int index) { struct esd_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; struct esd_usb_net_priv *priv; int err = 0; int i; netdev = alloc_candev(sizeof(*priv), ESD_USB_MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "couldn't alloc candev\n"); err = -ENOMEM; goto done; } priv = netdev_priv(netdev); init_usb_anchor(&priv->tx_submitted); atomic_set(&priv->active_tx_jobs, 0); for (i = 0; i < ESD_USB_MAX_TX_URBS; i++) priv->tx_contexts[i].echo_index = ESD_USB_MAX_TX_URBS; priv->usb = dev; priv->netdev = netdev; priv->index = index; priv->can.state = CAN_STATE_STOPPED; priv->can.ctrlmode_supported = CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_BERR_REPORTING; switch (le16_to_cpu(dev->udev->descriptor.idProduct)) { case ESD_USB_CANUSB3_PRODUCT_ID: priv->can.clock.freq = ESD_USB_3_CAN_CLOCK; priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.bittiming_const = &esd_usb_3_nom_bittiming_const; priv->can.data_bittiming_const = &esd_usb_3_data_bittiming_const; priv->can.do_set_bittiming = esd_usb_3_set_bittiming; priv->can.do_set_data_bittiming = esd_usb_3_set_bittiming; break; case ESD_USB_CANUSBM_PRODUCT_ID: priv->can.clock.freq = ESD_USB_M_CAN_CLOCK; priv->can.bittiming_const = &esd_usb_2_bittiming_const; priv->can.do_set_bittiming = esd_usb_2_set_bittiming; break; case ESD_USB_CANUSB2_PRODUCT_ID: default: priv->can.clock.freq = ESD_USB_2_CAN_CLOCK; priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; priv->can.bittiming_const = &esd_usb_2_bittiming_const; priv->can.do_set_bittiming = esd_usb_2_set_bittiming; break; } priv->can.do_set_mode = esd_usb_set_mode; priv->can.do_get_berr_counter = esd_usb_get_berr_counter; netdev->flags |= IFF_ECHO; /* we support local echo */ netdev->netdev_ops = &esd_usb_netdev_ops; netdev->ethtool_ops = &esd_usb_ethtool_ops; SET_NETDEV_DEV(netdev, &intf->dev); netdev->dev_id = index; err = register_candev(netdev); if (err) { dev_err(&intf->dev, "couldn't register CAN device: %d\n", err); free_candev(netdev); err = -ENOMEM; goto done; } dev->nets[index] = priv; netdev_info(netdev, "device %s registered\n", netdev->name); done: return err; } /* probe function for new USB devices * * check version information and number of available * CAN interfaces */ static int esd_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct esd_usb *dev; union esd_usb_msg *msg; int i, err; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { err = -ENOMEM; goto done; } dev->udev = interface_to_usbdev(intf); init_usb_anchor(&dev->rx_submitted); usb_set_intfdata(intf, dev); msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) { err = -ENOMEM; goto free_msg; } /* query number of CAN interfaces (nets) */ msg->hdr.cmd = ESD_USB_CMD_VERSION; msg->hdr.len = sizeof(struct esd_usb_version_msg) / sizeof(u32); /* # of 32bit words */ msg->version.rsvd = 0; msg->version.flags = 0; msg->version.drv_version = 0; err = esd_usb_send_msg(dev, msg); if (err < 0) { dev_err(&intf->dev, "sending version message failed\n"); goto free_msg; } err = esd_usb_wait_msg(dev, msg); if (err < 0) { dev_err(&intf->dev, "no version message answer\n"); goto free_msg; } dev->net_count = (int)msg->version_reply.nets; dev->version = le32_to_cpu(msg->version_reply.version); if (device_create_file(&intf->dev, &dev_attr_firmware)) dev_err(&intf->dev, "Couldn't create device file for firmware\n"); if (device_create_file(&intf->dev, &dev_attr_hardware)) dev_err(&intf->dev, "Couldn't create device file for hardware\n"); if (device_create_file(&intf->dev, &dev_attr_nets)) dev_err(&intf->dev, "Couldn't create device file for nets\n"); /* do per device probing */ for (i = 0; i < dev->net_count; i++) esd_usb_probe_one_net(intf, i); free_msg: kfree(msg); if (err) kfree(dev); done: return err; } /* called by the usb core when the device is removed from the system */ static void esd_usb_disconnect(struct usb_interface *intf) { struct esd_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; int i; device_remove_file(&intf->dev, &dev_attr_firmware); device_remove_file(&intf->dev, &dev_attr_hardware); device_remove_file(&intf->dev, &dev_attr_nets); usb_set_intfdata(intf, NULL); if (dev) { for (i = 0; i < dev->net_count; i++) { if (dev->nets[i]) { netdev = dev->nets[i]->netdev; unregister_netdev(netdev); free_candev(netdev); } } unlink_all_urbs(dev); kfree(dev); } } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver esd_usb_driver = { .name = KBUILD_MODNAME, .probe = esd_usb_probe, .disconnect = esd_usb_disconnect, .id_table = esd_usb_table, }; module_usb_driver(esd_usb_driver);
240 295 116 116 116 153 154 147 147 179 180 180 183 183 176 176 32 32 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/cache.h> #include <linux/random.h> #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/string.h> #include <linux/net.h> #include <linux/siphash.h> #include <net/secure_seq.h> #if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) #include <linux/in6.h> #include <net/tcp.h> static siphash_aligned_key_t net_secret; static siphash_aligned_key_t ts_secret; #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); } static __always_inline void ts_secret_init(void) { net_get_random_once(&ts_secret, sizeof(ts_secret)); } #endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) { /* * As close as possible to RFC 793, which * suggests using a 250 kHz clock. * Further reading shows this assumes 2 Mb/s networks. * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but * we also need to limit the resolution so that the u32 seq * overlaps less than one time per MSL (2 minutes). * Choosing a clock of 64 ns period is OK. (period of 274 s) */ return seq + (ktime_get_real_ns() >> 6); } #endif #if IS_ENABLED(CONFIG_IPV6) u32 secure_tcpv6_ts_off(const struct net *net, const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; struct in6_addr daddr; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, }; if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } EXPORT_SYMBOL(secure_tcpv6_ts_off); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; __be16 sport; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .sport = sport, .dport = dport }; u32 hash; net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_seq); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); } EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u32 hash; net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); return seq_scale(hash); } EXPORT_SYMBOL_GPL(secure_tcp_seq); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_4u32((__force u32)saddr, (__force u32)daddr, (__force u16)dport, jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif #if IS_ENABLED(CONFIG_IP_DCCP) u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u64 seq; net_secret_init(); seq = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); seq += ktime_get_real_ns(); seq &= (1ull << 48) - 1; return seq; } EXPORT_SYMBOL(secure_dccp_sequence_number); #if IS_ENABLED(CONFIG_IPV6) u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; __be16 sport; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .sport = sport, .dport = dport }; u64 seq; net_secret_init(); seq = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); seq += ktime_get_real_ns(); seq &= (1ull << 48) - 1; return seq; } EXPORT_SYMBOL(secure_dccpv6_sequence_number); #endif #endif
9 9 9 3 3 15 15 15 15 2 2 2 8 8 8 8 5 5 5 5 5 5 6 6 6 6 8 13 13 3 2 2 1 1 2 2 2 8 7 7 1419 18 11 11 11 7 13 13 13 1421 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007, 2008, 2009 Siemens AG */ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/device.h> #include <net/cfg802154.h> #include <net/rtnetlink.h> #include "ieee802154.h" #include "nl802154.h" #include "sysfs.h" #include "core.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" /* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg802154_rdev_list); int cfg802154_rdev_list_generation; struct wpan_phy *wpan_phy_find(const char *str) { struct device *dev; if (WARN_ON(!str)) return NULL; dev = class_find_device_by_name(&wpan_phy_class, str); if (!dev) return NULL; return container_of(dev, struct wpan_phy, dev); } EXPORT_SYMBOL(wpan_phy_find); struct wpan_phy_iter_data { int (*fn)(struct wpan_phy *phy, void *data); void *data; }; static int wpan_phy_iter(struct device *dev, void *_data) { struct wpan_phy_iter_data *wpid = _data; struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); return wpid->fn(phy, wpid->data); } int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), void *data) { struct wpan_phy_iter_data wpid = { .fn = fn, .data = data, }; return class_for_each_device(&wpan_phy_class, NULL, &wpid, wpan_phy_iter); } EXPORT_SYMBOL(wpan_phy_for_each); struct cfg802154_registered_device * cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx) { struct cfg802154_registered_device *result = NULL, *rdev; ASSERT_RTNL(); list_for_each_entry(rdev, &cfg802154_rdev_list, list) { if (rdev->wpan_phy_idx == wpan_phy_idx) { result = rdev; break; } } return result; } struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx) { struct cfg802154_registered_device *rdev; ASSERT_RTNL(); rdev = cfg802154_rdev_by_wpan_phy_idx(wpan_phy_idx); if (!rdev) return NULL; return &rdev->wpan_phy; } struct wpan_phy * wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size) { static atomic_t wpan_phy_counter = ATOMIC_INIT(0); struct cfg802154_registered_device *rdev; size_t alloc_size; alloc_size = sizeof(*rdev) + priv_size; rdev = kzalloc(alloc_size, GFP_KERNEL); if (!rdev) return NULL; rdev->ops = ops; rdev->wpan_phy_idx = atomic_inc_return(&wpan_phy_counter); if (unlikely(rdev->wpan_phy_idx < 0)) { /* ugh, wrapped! */ atomic_dec(&wpan_phy_counter); kfree(rdev); return NULL; } /* atomic_inc_return makes it start at 1, make it start at 0 */ rdev->wpan_phy_idx--; INIT_LIST_HEAD(&rdev->wpan_dev_list); device_initialize(&rdev->wpan_phy.dev); dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx); rdev->wpan_phy.dev.class = &wpan_phy_class; rdev->wpan_phy.dev.platform_data = rdev; wpan_phy_net_set(&rdev->wpan_phy, &init_net); init_waitqueue_head(&rdev->dev_wait); init_waitqueue_head(&rdev->wpan_phy.sync_txq); spin_lock_init(&rdev->wpan_phy.queue_lock); return &rdev->wpan_phy; } EXPORT_SYMBOL(wpan_phy_new); int wpan_phy_register(struct wpan_phy *phy) { struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(phy); int ret; rtnl_lock(); ret = device_add(&phy->dev); if (ret) { rtnl_unlock(); return ret; } list_add_rcu(&rdev->list, &cfg802154_rdev_list); cfg802154_rdev_list_generation++; /* TODO phy registered lock */ rtnl_unlock(); /* TODO nl802154 phy notify */ return 0; } EXPORT_SYMBOL(wpan_phy_register); void wpan_phy_unregister(struct wpan_phy *phy) { struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(phy); wait_event(rdev->dev_wait, ({ int __count; rtnl_lock(); __count = rdev->opencount; rtnl_unlock(); __count == 0; })); rtnl_lock(); /* TODO nl802154 phy notify */ /* TODO phy registered lock */ WARN_ON(!list_empty(&rdev->wpan_dev_list)); /* First remove the hardware from everywhere, this makes * it impossible to find from userspace. */ list_del_rcu(&rdev->list); synchronize_rcu(); cfg802154_rdev_list_generation++; device_del(&phy->dev); rtnl_unlock(); } EXPORT_SYMBOL(wpan_phy_unregister); void wpan_phy_free(struct wpan_phy *phy) { put_device(&phy->dev); } EXPORT_SYMBOL(wpan_phy_free); static void cfg802154_free_peer_structures(struct wpan_dev *wpan_dev) { struct ieee802154_pan_device *child, *tmp; mutex_lock(&wpan_dev->association_lock); kfree(wpan_dev->parent); wpan_dev->parent = NULL; list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) { list_del(&child->node); kfree(child); } wpan_dev->nchildren = 0; mutex_unlock(&wpan_dev->association_lock); } int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, struct net *net) { struct wpan_dev *wpan_dev; int err = 0; list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) { if (!wpan_dev->netdev) continue; wpan_dev->netdev->netns_local = false; err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d"); if (err) break; wpan_dev->netdev->netns_local = true; } if (err) { /* failed -- clean up to old netns */ net = wpan_phy_net(&rdev->wpan_phy); list_for_each_entry_continue_reverse(wpan_dev, &rdev->wpan_dev_list, list) { if (!wpan_dev->netdev) continue; wpan_dev->netdev->netns_local = false; err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d"); WARN_ON(err); wpan_dev->netdev->netns_local = true; } return err; } wpan_phy_net_set(&rdev->wpan_phy, net); err = device_rename(&rdev->wpan_phy.dev, dev_name(&rdev->wpan_phy.dev)); WARN_ON(err); return 0; } void cfg802154_dev_free(struct cfg802154_registered_device *rdev) { kfree(rdev); } static void cfg802154_update_iface_num(struct cfg802154_registered_device *rdev, int iftype, int num) { ASSERT_RTNL(); rdev->num_running_ifaces += num; } static int cfg802154_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wpan_dev *wpan_dev = dev->ieee802154_ptr; struct cfg802154_registered_device *rdev; if (!wpan_dev) return NOTIFY_DONE; rdev = wpan_phy_to_rdev(wpan_dev->wpan_phy); /* TODO WARN_ON unspec type */ switch (state) { /* TODO NETDEV_DEVTYPE */ case NETDEV_REGISTER: dev->netns_local = true; wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; mutex_init(&wpan_dev->association_lock); INIT_LIST_HEAD(&wpan_dev->children); wpan_dev->max_associations = SZ_16K; wpan_dev->netdev = dev; break; case NETDEV_DOWN: cfg802154_update_iface_num(rdev, wpan_dev->iftype, -1); rdev->opencount--; wake_up(&rdev->dev_wait); break; case NETDEV_UP: cfg802154_update_iface_num(rdev, wpan_dev->iftype, 1); rdev->opencount++; break; case NETDEV_UNREGISTER: cfg802154_free_peer_structures(wpan_dev); /* It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check * that the interface is still on the list * of registered interfaces, and only then * remove and clean it up. */ if (!list_empty(&wpan_dev->list)) { list_del_rcu(&wpan_dev->list); rdev->devlist_generation++; } /* synchronize (so that we won't find this netdev * from other code any more) and then clear the list * head so that the above code can safely check for * !list_empty() to avoid double-cleanup. */ synchronize_rcu(); INIT_LIST_HEAD(&wpan_dev->list); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static struct notifier_block cfg802154_netdev_notifier = { .notifier_call = cfg802154_netdev_notifier_call, }; static void __net_exit cfg802154_pernet_exit(struct net *net) { struct cfg802154_registered_device *rdev; rtnl_lock(); list_for_each_entry(rdev, &cfg802154_rdev_list, list) { if (net_eq(wpan_phy_net(&rdev->wpan_phy), net)) WARN_ON(cfg802154_switch_netns(rdev, &init_net)); } rtnl_unlock(); } static struct pernet_operations cfg802154_pernet_ops = { .exit = cfg802154_pernet_exit, }; static int __init wpan_phy_class_init(void) { int rc; rc = register_pernet_device(&cfg802154_pernet_ops); if (rc) goto err; rc = wpan_phy_sysfs_init(); if (rc) goto err_sysfs; rc = register_netdevice_notifier(&cfg802154_netdev_notifier); if (rc) goto err_nl; rc = ieee802154_nl_init(); if (rc) goto err_notifier; rc = nl802154_init(); if (rc) goto err_ieee802154_nl; return 0; err_ieee802154_nl: ieee802154_nl_exit(); err_notifier: unregister_netdevice_notifier(&cfg802154_netdev_notifier); err_nl: wpan_phy_sysfs_exit(); err_sysfs: unregister_pernet_device(&cfg802154_pernet_ops); err: return rc; } subsys_initcall(wpan_phy_class_init); static void __exit wpan_phy_class_exit(void) { nl802154_exit(); ieee802154_nl_exit(); unregister_netdevice_notifier(&cfg802154_netdev_notifier); wpan_phy_sysfs_exit(); unregister_pernet_device(&cfg802154_pernet_ops); } module_exit(wpan_phy_class_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("IEEE 802.15.4 configuration interface"); MODULE_AUTHOR("Dmitry Eremin-Solenikov");
6 6 2 2 4 4 4 4 4 7 7 7 7 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 // SPDX-License-Identifier: GPL-2.0 /* * DMABUF System heap exporter * * Copyright (C) 2011 Google, Inc. * Copyright (C) 2019, 2020 Linaro Ltd. * * Portions based off of Andrew Davis' SRAM heap: * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ * Andrew F. Davis <afd@ti.com> */ #include <linux/dma-buf.h> #include <linux/dma-mapping.h> #include <linux/dma-heap.h> #include <linux/err.h> #include <linux/highmem.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> static struct dma_heap *sys_heap; struct system_heap_buffer { struct dma_heap *heap; struct list_head attachments; struct mutex lock; unsigned long len; struct sg_table sg_table; int vmap_cnt; void *vaddr; }; struct dma_heap_attachment { struct device *dev; struct sg_table *table; struct list_head list; bool mapped; }; #define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO) #define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ | __GFP_COMP) static gfp_t order_flags[] = {HIGH_ORDER_GFP, HIGH_ORDER_GFP, LOW_ORDER_GFP}; /* * The selection of the orders used for allocation (1MB, 64K, 4K) is designed * to match with the sizes often found in IOMMUs. Using order 4 pages instead * of order 0 pages can significantly improve the performance of many IOMMUs * by reducing TLB pressure and time spent updating page tables. */ static const unsigned int orders[] = {8, 4, 0}; #define NUM_ORDERS ARRAY_SIZE(orders) static struct sg_table *dup_sg_table(struct sg_table *table) { struct sg_table *new_table; int ret, i; struct scatterlist *sg, *new_sg; new_table = kzalloc(sizeof(*new_table), GFP_KERNEL); if (!new_table) return ERR_PTR(-ENOMEM); ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL); if (ret) { kfree(new_table); return ERR_PTR(-ENOMEM); } new_sg = new_table->sgl; for_each_sgtable_sg(table, sg, i) { sg_set_page(new_sg, sg_page(sg), sg->length, sg->offset); new_sg = sg_next(new_sg); } return new_table; } static int system_heap_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; struct sg_table *table; a = kzalloc(sizeof(*a), GFP_KERNEL); if (!a) return -ENOMEM; table = dup_sg_table(&buffer->sg_table); if (IS_ERR(table)) { kfree(a); return -ENOMEM; } a->table = table; a->dev = attachment->dev; INIT_LIST_HEAD(&a->list); a->mapped = false; attachment->priv = a; mutex_lock(&buffer->lock); list_add(&a->list, &buffer->attachments); mutex_unlock(&buffer->lock); return 0; } static void system_heap_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a = attachment->priv; mutex_lock(&buffer->lock); list_del(&a->list); mutex_unlock(&buffer->lock); sg_free_table(a->table); kfree(a->table); kfree(a); } static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attachment, enum dma_data_direction direction) { struct dma_heap_attachment *a = attachment->priv; struct sg_table *table = a->table; int ret; ret = dma_map_sgtable(attachment->dev, table, direction, 0); if (ret) return ERR_PTR(ret); a->mapped = true; return table; } static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *table, enum dma_data_direction direction) { struct dma_heap_attachment *a = attachment->priv; a->mapped = false; dma_unmap_sgtable(attachment->dev, table, direction, 0); } static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; mutex_lock(&buffer->lock); if (buffer->vmap_cnt) invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); list_for_each_entry(a, &buffer->attachments, list) { if (!a->mapped) continue; dma_sync_sgtable_for_cpu(a->dev, a->table, direction); } mutex_unlock(&buffer->lock); return 0; } static int system_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, enum dma_data_direction direction) { struct system_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; mutex_lock(&buffer->lock); if (buffer->vmap_cnt) flush_kernel_vmap_range(buffer->vaddr, buffer->len); list_for_each_entry(a, &buffer->attachments, list) { if (!a->mapped) continue; dma_sync_sgtable_for_device(a->dev, a->table, direction); } mutex_unlock(&buffer->lock); return 0; } static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) { struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table = &buffer->sg_table; unsigned long addr = vma->vm_start; struct sg_page_iter piter; int ret; for_each_sgtable_page(table, &piter, vma->vm_pgoff) { struct page *page = sg_page_iter_page(&piter); ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE, vma->vm_page_prot); if (ret) return ret; addr += PAGE_SIZE; if (addr >= vma->vm_end) return 0; } return 0; } static void *system_heap_do_vmap(struct system_heap_buffer *buffer) { struct sg_table *table = &buffer->sg_table; int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE; struct page **pages = vmalloc(sizeof(struct page *) * npages); struct page **tmp = pages; struct sg_page_iter piter; void *vaddr; if (!pages) return ERR_PTR(-ENOMEM); for_each_sgtable_page(table, &piter, 0) { WARN_ON(tmp - pages >= npages); *tmp++ = sg_page_iter_page(&piter); } vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL); vfree(pages); if (!vaddr) return ERR_PTR(-ENOMEM); return vaddr; } static int system_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map) { struct system_heap_buffer *buffer = dmabuf->priv; void *vaddr; int ret = 0; mutex_lock(&buffer->lock); if (buffer->vmap_cnt) { buffer->vmap_cnt++; iosys_map_set_vaddr(map, buffer->vaddr); goto out; } vaddr = system_heap_do_vmap(buffer); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto out; } buffer->vaddr = vaddr; buffer->vmap_cnt++; iosys_map_set_vaddr(map, buffer->vaddr); out: mutex_unlock(&buffer->lock); return ret; } static void system_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) { struct system_heap_buffer *buffer = dmabuf->priv; mutex_lock(&buffer->lock); if (!--buffer->vmap_cnt) { vunmap(buffer->vaddr); buffer->vaddr = NULL; } mutex_unlock(&buffer->lock); iosys_map_clear(map); } static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table; struct scatterlist *sg; int i; table = &buffer->sg_table; for_each_sgtable_sg(table, sg, i) { struct page *page = sg_page(sg); __free_pages(page, compound_order(page)); } sg_free_table(table); kfree(buffer); } static const struct dma_buf_ops system_heap_buf_ops = { .attach = system_heap_attach, .detach = system_heap_detach, .map_dma_buf = system_heap_map_dma_buf, .unmap_dma_buf = system_heap_unmap_dma_buf, .begin_cpu_access = system_heap_dma_buf_begin_cpu_access, .end_cpu_access = system_heap_dma_buf_end_cpu_access, .mmap = system_heap_mmap, .vmap = system_heap_vmap, .vunmap = system_heap_vunmap, .release = system_heap_dma_buf_release, }; static struct page *alloc_largest_available(unsigned long size, unsigned int max_order) { struct page *page; int i; for (i = 0; i < NUM_ORDERS; i++) { if (size < (PAGE_SIZE << orders[i])) continue; if (max_order < orders[i]) continue; page = alloc_pages(order_flags[i], orders[i]); if (!page) continue; return page; } return NULL; } static struct dma_buf *system_heap_allocate(struct dma_heap *heap, unsigned long len, u32 fd_flags, u64 heap_flags) { struct system_heap_buffer *buffer; DEFINE_DMA_BUF_EXPORT_INFO(exp_info); unsigned long size_remaining = len; unsigned int max_order = orders[0]; struct dma_buf *dmabuf; struct sg_table *table; struct scatterlist *sg; struct list_head pages; struct page *page, *tmp_page; int i, ret = -ENOMEM; buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); if (!buffer) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&buffer->attachments); mutex_init(&buffer->lock); buffer->heap = heap; buffer->len = len; INIT_LIST_HEAD(&pages); i = 0; while (size_remaining > 0) { /* * Avoid trying to allocate memory if the process * has been killed by SIGKILL */ if (fatal_signal_pending(current)) { ret = -EINTR; goto free_buffer; } page = alloc_largest_available(size_remaining, max_order); if (!page) goto free_buffer; list_add_tail(&page->lru, &pages); size_remaining -= page_size(page); max_order = compound_order(page); i++; } table = &buffer->sg_table; if (sg_alloc_table(table, i, GFP_KERNEL)) goto free_buffer; sg = table->sgl; list_for_each_entry_safe(page, tmp_page, &pages, lru) { sg_set_page(sg, page, page_size(page), 0); sg = sg_next(sg); list_del(&page->lru); } /* create the dmabuf */ exp_info.exp_name = dma_heap_get_name(heap); exp_info.ops = &system_heap_buf_ops; exp_info.size = buffer->len; exp_info.flags = fd_flags; exp_info.priv = buffer; dmabuf = dma_buf_export(&exp_info); if (IS_ERR(dmabuf)) { ret = PTR_ERR(dmabuf); goto free_pages; } return dmabuf; free_pages: for_each_sgtable_sg(table, sg, i) { struct page *p = sg_page(sg); __free_pages(p, compound_order(p)); } sg_free_table(table); free_buffer: list_for_each_entry_safe(page, tmp_page, &pages, lru) __free_pages(page, compound_order(page)); kfree(buffer); return ERR_PTR(ret); } static const struct dma_heap_ops system_heap_ops = { .allocate = system_heap_allocate, }; static int __init system_heap_create(void) { struct dma_heap_export_info exp_info; exp_info.name = "system"; exp_info.ops = &system_heap_ops; exp_info.priv = NULL; sys_heap = dma_heap_add(&exp_info); if (IS_ERR(sys_heap)) return PTR_ERR(sys_heap); return 0; } module_init(system_heap_create);
13 13 13 13 13 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 /* * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/completion.h> #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/interrupt.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/bitops.h> #include <linux/random.h> #include <rdma/ib_cache.h> #include "sa.h" static int mcast_add_one(struct ib_device *device); static void mcast_remove_one(struct ib_device *device, void *client_data); static struct ib_client mcast_client = { .name = "ib_multicast", .add = mcast_add_one, .remove = mcast_remove_one }; static struct ib_sa_client sa_client; static struct workqueue_struct *mcast_wq; static union ib_gid mgid0; struct mcast_device; struct mcast_port { struct mcast_device *dev; spinlock_t lock; struct rb_root table; refcount_t refcount; struct completion comp; u32 port_num; }; struct mcast_device { struct ib_device *device; struct ib_event_handler event_handler; int start_port; int end_port; struct mcast_port port[]; }; enum mcast_state { MCAST_JOINING, MCAST_MEMBER, MCAST_ERROR, }; enum mcast_group_state { MCAST_IDLE, MCAST_BUSY, MCAST_GROUP_ERROR, MCAST_PKEY_EVENT }; enum { MCAST_INVALID_PKEY_INDEX = 0xFFFF }; struct mcast_member; struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; struct mcast_port *port; spinlock_t lock; struct work_struct work; struct list_head pending_list; struct list_head active_list; struct mcast_member *last_join; int members[NUM_JOIN_MEMBERSHIP_TYPES]; atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; u16 pkey_index; u8 leave_state; int retries; }; struct mcast_member { struct ib_sa_multicast multicast; struct ib_sa_client *client; struct mcast_group *group; struct list_head list; enum mcast_state state; refcount_t refcount; struct completion comp; }; static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context); static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, void *context); static struct mcast_group *mcast_find(struct mcast_port *port, union ib_gid *mgid) { struct rb_node *node = port->table.rb_node; struct mcast_group *group; int ret; while (node) { group = rb_entry(node, struct mcast_group, node); ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); if (!ret) return group; if (ret < 0) node = node->rb_left; else node = node->rb_right; } return NULL; } static struct mcast_group *mcast_insert(struct mcast_port *port, struct mcast_group *group, int allow_duplicates) { struct rb_node **link = &port->table.rb_node; struct rb_node *parent = NULL; struct mcast_group *cur_group; int ret; while (*link) { parent = *link; cur_group = rb_entry(parent, struct mcast_group, node); ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, sizeof group->rec.mgid); if (ret < 0) link = &(*link)->rb_left; else if (ret > 0) link = &(*link)->rb_right; else if (allow_duplicates) link = &(*link)->rb_left; else return cur_group; } rb_link_node(&group->node, parent, link); rb_insert_color(&group->node, &port->table); return NULL; } static void deref_port(struct mcast_port *port) { if (refcount_dec_and_test(&port->refcount)) complete(&port->comp); } static void release_group(struct mcast_group *group) { struct mcast_port *port = group->port; unsigned long flags; spin_lock_irqsave(&port->lock, flags); if (atomic_dec_and_test(&group->refcount)) { rb_erase(&group->node, &port->table); spin_unlock_irqrestore(&port->lock, flags); kfree(group); deref_port(port); } else spin_unlock_irqrestore(&port->lock, flags); } static void deref_member(struct mcast_member *member) { if (refcount_dec_and_test(&member->refcount)) complete(&member->comp); } static void queue_join(struct mcast_member *member) { struct mcast_group *group = member->group; unsigned long flags; spin_lock_irqsave(&group->lock, flags); list_add_tail(&member->list, &group->pending_list); if (group->state == MCAST_IDLE) { group->state = MCAST_BUSY; atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } spin_unlock_irqrestore(&group->lock, flags); } /* * A multicast group has four types of members: full member, non member, * sendonly non member and sendonly full member. * We need to keep track of the number of members of each * type based on their join state. Adjust the number of members the belong to * the specified join states. */ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) { int i; for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) if (join_state & 0x1) group->members[i] += inc; } /* * If a multicast group has zero members left for a particular join state, but * the group is still a member with the SA, we need to leave that join state. * Determine which join states we still belong to, but that do not have any * active members. */ static u8 get_leave_state(struct mcast_group *group) { u8 leave_state = 0; int i; for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) if (!group->members[i]) leave_state |= (0x1 << i); return leave_state & group->rec.join_state; } static int check_selector(ib_sa_comp_mask comp_mask, ib_sa_comp_mask selector_mask, ib_sa_comp_mask value_mask, u8 selector, u8 src_value, u8 dst_value) { int err; if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) return 0; switch (selector) { case IB_SA_GT: err = (src_value <= dst_value); break; case IB_SA_LT: err = (src_value >= dst_value); break; case IB_SA_EQ: err = (src_value != dst_value); break; default: err = 0; break; } return err; } static int cmp_rec(struct ib_sa_mcmember_rec *src, struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) { /* MGID must already match */ if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) return -EINVAL; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, src->mtu, dst->mtu)) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && src->traffic_class != dst->traffic_class) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) return -EINVAL; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, src->rate, dst->rate)) return -EINVAL; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, dst->packet_life_time_selector, src->packet_life_time, dst->packet_life_time)) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && src->flow_label != dst->flow_label) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && src->hop_limit != dst->hop_limit) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) return -EINVAL; /* join_state checked separately, proxy_join ignored */ return 0; } static int send_join(struct mcast_group *group, struct mcast_member *member) { struct mcast_port *port = group->port; int ret; group->last_join = member; ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, port->port_num, IB_MGMT_METHOD_SET, &member->multicast.rec, member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) { struct mcast_port *port = group->port; struct ib_sa_mcmember_rec rec; int ret; rec = group->rec; rec.join_state = leave_state; group->leave_state = leave_state; ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, port->port_num, IB_SA_METHOD_DELETE, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, u8 join_state) { member->state = MCAST_MEMBER; adjust_membership(group, join_state, 1); group->rec.join_state |= join_state; member->multicast.rec = group->rec; member->multicast.rec.join_state = join_state; list_move(&member->list, &group->active_list); } static int fail_join(struct mcast_group *group, struct mcast_member *member, int status) { spin_lock_irq(&group->lock); list_del_init(&member->list); spin_unlock_irq(&group->lock); return member->multicast.callback(status, &member->multicast); } static void process_group_error(struct mcast_group *group) { struct mcast_member *member; int ret = 0; u16 pkey_index; if (group->state == MCAST_PKEY_EVENT) ret = ib_find_pkey(group->port->dev->device, group->port->port_num, be16_to_cpu(group->rec.pkey), &pkey_index); spin_lock_irq(&group->lock); if (group->state == MCAST_PKEY_EVENT && !ret && group->pkey_index == pkey_index) goto out; while (!list_empty(&group->active_list)) { member = list_entry(group->active_list.next, struct mcast_member, list); refcount_inc(&member->refcount); list_del_init(&member->list); adjust_membership(group, member->multicast.rec.join_state, -1); member->state = MCAST_ERROR; spin_unlock_irq(&group->lock); ret = member->multicast.callback(-ENETRESET, &member->multicast); deref_member(member); if (ret) ib_sa_free_multicast(&member->multicast); spin_lock_irq(&group->lock); } group->rec.join_state = 0; out: group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); } static void mcast_work_handler(struct work_struct *work) { struct mcast_group *group; struct mcast_member *member; struct ib_sa_multicast *multicast; int status, ret; u8 join_state; group = container_of(work, typeof(*group), work); retest: spin_lock_irq(&group->lock); while (!list_empty(&group->pending_list) || (group->state != MCAST_BUSY)) { if (group->state != MCAST_BUSY) { spin_unlock_irq(&group->lock); process_group_error(group); goto retest; } member = list_entry(group->pending_list.next, struct mcast_member, list); multicast = &member->multicast; join_state = multicast->rec.join_state; refcount_inc(&member->refcount); if (join_state == (group->rec.join_state & join_state)) { status = cmp_rec(&group->rec, &multicast->rec, multicast->comp_mask); if (!status) join_group(group, member, join_state); else list_del_init(&member->list); spin_unlock_irq(&group->lock); ret = multicast->callback(status, multicast); } else { spin_unlock_irq(&group->lock); status = send_join(group, member); if (!status) { deref_member(member); return; } ret = fail_join(group, member, status); } deref_member(member); if (ret) ib_sa_free_multicast(&member->multicast); spin_lock_irq(&group->lock); } join_state = get_leave_state(group); if (join_state) { group->rec.join_state &= ~join_state; spin_unlock_irq(&group->lock); if (send_leave(group, join_state)) goto retest; } else { group->state = MCAST_IDLE; spin_unlock_irq(&group->lock); release_group(group); } } /* * Fail a join request if it is still active - at the head of the pending queue. */ static void process_join_error(struct mcast_group *group, int status) { struct mcast_member *member; int ret; spin_lock_irq(&group->lock); member = list_entry(group->pending_list.next, struct mcast_member, list); if (group->last_join == member) { refcount_inc(&member->refcount); list_del_init(&member->list); spin_unlock_irq(&group->lock); ret = member->multicast.callback(status, &member->multicast); deref_member(member); if (ret) ib_sa_free_multicast(&member->multicast); } else spin_unlock_irq(&group->lock); } static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; u16 pkey_index = MCAST_INVALID_PKEY_INDEX; if (status) process_join_error(group, status); else { int mgids_changed, is_mgid0; if (ib_find_pkey(group->port->dev->device, group->port->port_num, be16_to_cpu(rec->pkey), &pkey_index)) pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && group->pkey_index == MCAST_INVALID_PKEY_INDEX) group->pkey_index = pkey_index; mgids_changed = memcmp(&rec->mgid, &group->rec.mgid, sizeof(group->rec.mgid)); group->rec = *rec; if (mgids_changed) { rb_erase(&group->node, &group->port->table); is_mgid0 = !memcmp(&mgid0, &group->rec.mgid, sizeof(mgid0)); mcast_insert(group->port, group, is_mgid0); } spin_unlock_irq(&group->port->lock); } mcast_work_handler(&group->work); } static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; if (status && group->retries > 0 && !send_leave(group, group->leave_state)) group->retries--; else mcast_work_handler(&group->work); } static struct mcast_group *acquire_group(struct mcast_port *port, union ib_gid *mgid, gfp_t gfp_mask) { struct mcast_group *group, *cur_group; unsigned long flags; int is_mgid0; is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); if (!is_mgid0) { spin_lock_irqsave(&port->lock, flags); group = mcast_find(port, mgid); if (group) goto found; spin_unlock_irqrestore(&port->lock, flags); } group = kzalloc(sizeof *group, gfp_mask); if (!group) return NULL; group->retries = 3; group->port = port; group->rec.mgid = *mgid; group->pkey_index = MCAST_INVALID_PKEY_INDEX; INIT_LIST_HEAD(&group->pending_list); INIT_LIST_HEAD(&group->active_list); INIT_WORK(&group->work, mcast_work_handler); spin_lock_init(&group->lock); spin_lock_irqsave(&port->lock, flags); cur_group = mcast_insert(port, group, is_mgid0); if (cur_group) { kfree(group); group = cur_group; } else refcount_inc(&port->refcount); found: atomic_inc(&group->refcount); spin_unlock_irqrestore(&port->lock, flags); return group; } /* * We serialize all join requests to a single group to make our lives much * easier. Otherwise, two users could try to join the same group * simultaneously, with different configurations, one could leave while the * join is in progress, etc., which makes locking around error recovery * difficult. */ struct ib_sa_multicast * ib_sa_join_multicast(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, gfp_t gfp_mask, int (*callback)(int status, struct ib_sa_multicast *multicast), void *context) { struct mcast_device *dev; struct mcast_member *member; struct ib_sa_multicast *multicast; int ret; dev = ib_get_client_data(device, &mcast_client); if (!dev) return ERR_PTR(-ENODEV); member = kmalloc(sizeof *member, gfp_mask); if (!member) return ERR_PTR(-ENOMEM); ib_sa_client_get(client); member->client = client; member->multicast.rec = *rec; member->multicast.comp_mask = comp_mask; member->multicast.callback = callback; member->multicast.context = context; init_completion(&member->comp); refcount_set(&member->refcount, 1); member->state = MCAST_JOINING; member->group = acquire_group(&dev->port[port_num - dev->start_port], &rec->mgid, gfp_mask); if (!member->group) { ret = -ENOMEM; goto err; } /* * The user will get the multicast structure in their callback. They * could then free the multicast structure before we can return from * this routine. So we save the pointer to return before queuing * any callback. */ multicast = &member->multicast; queue_join(member); return multicast; err: ib_sa_client_put(client); kfree(member); return ERR_PTR(ret); } EXPORT_SYMBOL(ib_sa_join_multicast); void ib_sa_free_multicast(struct ib_sa_multicast *multicast) { struct mcast_member *member; struct mcast_group *group; member = container_of(multicast, struct mcast_member, multicast); group = member->group; spin_lock_irq(&group->lock); if (member->state == MCAST_MEMBER) adjust_membership(group, multicast->rec.join_state, -1); list_del_init(&member->list); if (group->state == MCAST_IDLE) { group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); /* Continue to hold reference on group until callback */ queue_work(mcast_wq, &group->work); } else { spin_unlock_irq(&group->lock); release_group(group); } deref_member(member); wait_for_completion(&member->comp); ib_sa_client_put(member->client); kfree(member); } EXPORT_SYMBOL(ib_sa_free_multicast); int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num, union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) { struct mcast_device *dev; struct mcast_port *port; struct mcast_group *group; unsigned long flags; int ret = 0; dev = ib_get_client_data(device, &mcast_client); if (!dev) return -ENODEV; port = &dev->port[port_num - dev->start_port]; spin_lock_irqsave(&port->lock, flags); group = mcast_find(port, mgid); if (group) *rec = group->rec; else ret = -EADDRNOTAVAIL; spin_unlock_irqrestore(&port->lock, flags); return ret; } EXPORT_SYMBOL(ib_sa_get_mcmember_rec); /** * ib_init_ah_from_mcmember - Initialize AH attribute from multicast * member record and gid of the device. * @device: RDMA device * @port_num: Port of the rdma device to consider * @rec: Multicast member record to use * @ndev: Optional netdevice, applicable only for RoCE * @gid_type: GID type to consider * @ah_attr: AH attribute to fillup on successful completion * * ib_init_ah_from_mcmember() initializes AH attribute based on multicast * member record and other device properties. On success the caller is * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on * success or appropriate error code. * */ int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr) { const struct ib_gid_attr *sgid_attr; /* GID table is not based on the netdevice for IB link layer, * so ignore ndev during search. */ if (rdma_protocol_ib(device, port_num)) ndev = NULL; else if (!rdma_protocol_roce(device, port_num)) return -EINVAL; sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid, gid_type, port_num, ndev); if (IS_ERR(sgid_attr)) return PTR_ERR(sgid_attr); memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); rdma_move_grh_sgid_attr(ah_attr, &rec->mgid, be32_to_cpu(rec->flow_label), rec->hop_limit, rec->traffic_class, sgid_attr); return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); static void mcast_groups_event(struct mcast_port *port, enum mcast_group_state state) { struct mcast_group *group; struct rb_node *node; unsigned long flags; spin_lock_irqsave(&port->lock, flags); for (node = rb_first(&port->table); node; node = rb_next(node)) { group = rb_entry(node, struct mcast_group, node); spin_lock(&group->lock); if (group->state == MCAST_IDLE) { atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } if (group->state != MCAST_GROUP_ERROR) group->state = state; spin_unlock(&group->lock); } spin_unlock_irqrestore(&port->lock, flags); } static void mcast_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct mcast_device *dev; int index; dev = container_of(handler, struct mcast_device, event_handler); if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) return; index = event->element.port_num - dev->start_port; switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_CLIENT_REREGISTER: mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); break; case IB_EVENT_PKEY_CHANGE: mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); break; default: break; } } static int mcast_add_one(struct ib_device *device) { struct mcast_device *dev; struct mcast_port *port; int i; int count = 0; dev = kmalloc(struct_size(dev, port, device->phys_port_cnt), GFP_KERNEL); if (!dev) return -ENOMEM; dev->start_port = rdma_start_port(device); dev->end_port = rdma_end_port(device); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (!rdma_cap_ib_mcast(device, dev->start_port + i)) continue; port = &dev->port[i]; port->dev = dev; port->port_num = dev->start_port + i; spin_lock_init(&port->lock); port->table = RB_ROOT; init_completion(&port->comp); refcount_set(&port->refcount, 1); ++count; } if (!count) { kfree(dev); return -EOPNOTSUPP; } dev->device = device; ib_set_client_data(device, &mcast_client, dev); INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); ib_register_event_handler(&dev->event_handler); return 0; } static void mcast_remove_one(struct ib_device *device, void *client_data) { struct mcast_device *dev = client_data; struct mcast_port *port; int i; ib_unregister_event_handler(&dev->event_handler); flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (rdma_cap_ib_mcast(device, dev->start_port + i)) { port = &dev->port[i]; deref_port(port); wait_for_completion(&port->comp); } } kfree(dev); } int mcast_init(void) { int ret; mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); if (!mcast_wq) return -ENOMEM; ib_sa_register_client(&sa_client); ret = ib_register_client(&mcast_client); if (ret) goto err; return 0; err: ib_sa_unregister_client(&sa_client); destroy_workqueue(mcast_wq); return ret; } void mcast_cleanup(void) { ib_unregister_client(&mcast_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(mcast_wq); }
7533 102 27312 15772 28642 8415 10917 393 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 /* SPDX-License-Identifier: GPL-2.0 */ /* * This header provides generic wrappers for memory access instrumentation that * the compiler cannot emit for: KASAN, KCSAN, KMSAN. */ #ifndef _LINUX_INSTRUMENTED_H #define _LINUX_INSTRUMENTED_H #include <linux/compiler.h> #include <linux/kasan-checks.h> #include <linux/kcsan-checks.h> #include <linux/kmsan-checks.h> #include <linux/types.h> /** * instrument_read - instrument regular read access * @v: address of access * @size: size of access * * Instrument a regular read access. The instrumentation should be inserted * before the actual read happens. */ static __always_inline void instrument_read(const volatile void *v, size_t size) { kasan_check_read(v, size); kcsan_check_read(v, size); } /** * instrument_write - instrument regular write access * @v: address of access * @size: size of access * * Instrument a regular write access. The instrumentation should be inserted * before the actual write happens. */ static __always_inline void instrument_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_write(v, size); } /** * instrument_read_write - instrument regular read-write access * @v: address of access * @size: size of access * * Instrument a regular write access. The instrumentation should be inserted * before the actual write happens. */ static __always_inline void instrument_read_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_read_write(v, size); } /** * instrument_atomic_read - instrument atomic read access * @v: address of access * @size: size of access * * Instrument an atomic read access. The instrumentation should be inserted * before the actual read happens. */ static __always_inline void instrument_atomic_read(const volatile void *v, size_t size) { kasan_check_read(v, size); kcsan_check_atomic_read(v, size); } /** * instrument_atomic_write - instrument atomic write access * @v: address of access * @size: size of access * * Instrument an atomic write access. The instrumentation should be inserted * before the actual write happens. */ static __always_inline void instrument_atomic_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_atomic_write(v, size); } /** * instrument_atomic_read_write - instrument atomic read-write access * @v: address of access * @size: size of access * * Instrument an atomic read-write access. The instrumentation should be * inserted before the actual write happens. */ static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_atomic_read_write(v, size); } /** * instrument_copy_to_user - instrument reads of copy_to_user * @to: destination address * @from: source address * @n: number of bytes to copy * * Instrument reads from kernel memory, that are due to copy_to_user (and * variants). The instrumentation must be inserted before the accesses. */ static __always_inline void instrument_copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); kcsan_check_read(from, n); kmsan_copy_to_user(to, from, n, 0); } /** * instrument_copy_from_user_before - add instrumentation before copy_from_user * @to: destination address * @from: source address * @n: number of bytes to copy * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted before the accesses. */ static __always_inline void instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); kcsan_check_write(to, n); } /** * instrument_copy_from_user_after - add instrumentation after copy_from_user * @to: destination address * @from: source address * @n: number of bytes to copy * @left: number of bytes not copied (as returned by copy_from_user) * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted after the accesses. */ static __always_inline void instrument_copy_from_user_after(const void *to, const void __user *from, unsigned long n, unsigned long left) { kmsan_unpoison_memory(to, n - left); } /** * instrument_memcpy_before - add instrumentation before non-instrumented memcpy * @to: destination address * @from: source address * @n: number of bytes to copy * * Instrument memory accesses that happen in custom memcpy implementations. The * instrumentation should be inserted before the memcpy call. */ static __always_inline void instrument_memcpy_before(void *to, const void *from, unsigned long n) { kasan_check_write(to, n); kasan_check_read(from, n); kcsan_check_write(to, n); kcsan_check_read(from, n); } /** * instrument_memcpy_after - add instrumentation after non-instrumented memcpy * @to: destination address * @from: source address * @n: number of bytes to copy * @left: number of bytes not copied (if known) * * Instrument memory accesses that happen in custom memcpy implementations. The * instrumentation should be inserted after the memcpy call. */ static __always_inline void instrument_memcpy_after(void *to, const void *from, unsigned long n, unsigned long left) { kmsan_memmove(to, from, n - left); } /** * instrument_get_user() - add instrumentation to get_user()-like macros * @to: destination variable, may not be address-taken * * get_user() and friends are fragile, so it may depend on the implementation * whether the instrumentation happens before or after the data is copied from * the userspace. */ #define instrument_get_user(to) \ ({ \ u64 __tmp = (u64)(to); \ kmsan_unpoison_memory(&__tmp, sizeof(__tmp)); \ to = __tmp; \ }) /** * instrument_put_user() - add instrumentation to put_user()-like macros * @from: source address * @ptr: userspace pointer to copy to * @size: number of bytes to copy * * put_user() and friends are fragile, so it may depend on the implementation * whether the instrumentation happens before or after the data is copied from * the userspace. */ #define instrument_put_user(from, ptr, size) \ ({ \ kmsan_copy_to_user(ptr, &from, sizeof(from), 0); \ }) #endif /* _LINUX_INSTRUMENTED_H */
14 14 14 14 9 9 9 9 9 5 5 1 5 3 2 1 10 10 1 2 2 1 2 2 3 10 10 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 2 3 3 3 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 // SPDX-License-Identifier: GPL-2.0-or-later /* * PPP synchronous tty channel driver for Linux. * * This is a ppp channel driver that can be used with tty device drivers * that are frame oriented, such as synchronous HDLC devices. * * Complete PPP frames without encoding/decoding are exchanged between * the channel driver and the device driver. * * The async map IOCTL codes are implemented to keep the user mode * applications happy if they call them. Synchronous PPP does not use * the async maps. * * Copyright 1999 Paul Mackerras. * * Also touched by the grubby hands of Paul Fulghum paulkf@microgate.com * * This driver provides the encapsulation and framing for sending * and receiving PPP frames over sync serial lines. It relies on * the generic PPP layer to give it frames to send and to process * received frames. It implements the PPP line discipline. * * Part of the code in this driver was inspired by the old async-only * PPP driver, written by Michael Callahan and Al Longyear, and * subsequently hacked by Paul Mackerras. * * ==FILEVERSION 20040616== */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/netdevice.h> #include <linux/poll.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/ppp_channel.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/refcount.h> #include <linux/unaligned.h> #include <linux/uaccess.h> #define PPP_VERSION "2.4.2" /* Structure for storing local state. */ struct syncppp { struct tty_struct *tty; unsigned int flags; unsigned int rbits; int mru; spinlock_t xmit_lock; spinlock_t recv_lock; unsigned long xmit_flags; u32 xaccm[8]; u32 raccm; unsigned int bytes_sent; unsigned int bytes_rcvd; struct sk_buff *tpkt; unsigned long last_xmit; struct sk_buff_head rqueue; struct tasklet_struct tsk; refcount_t refcnt; struct completion dead_cmp; struct ppp_channel chan; /* interface to generic ppp layer */ }; /* Bit numbers in xmit_flags */ #define XMIT_WAKEUP 0 #define XMIT_FULL 1 /* Bits in rbits */ #define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) #define PPPSYNC_MAX_RQLEN 32 /* arbitrary */ /* * Prototypes. */ static struct sk_buff* ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *); static int ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb); static int ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg); static void ppp_sync_process(struct tasklet_struct *t); static int ppp_sync_push(struct syncppp *ap); static void ppp_sync_flush_output(struct syncppp *ap); static void ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count); static const struct ppp_channel_ops sync_ops = { .start_xmit = ppp_sync_send, .ioctl = ppp_sync_ioctl, }; /* * Utility procedure to print a buffer in hex/ascii */ static void ppp_print_buffer (const char *name, const __u8 *buf, int count) { if (name != NULL) printk(KERN_DEBUG "ppp_synctty: %s, count = %d\n", name, count); print_hex_dump_bytes("", DUMP_PREFIX_NONE, buf, count); } /* * Routines implementing the synchronous PPP line discipline. */ /* * We have a potential race on dereferencing tty->disc_data, * because the tty layer provides no locking at all - thus one * cpu could be running ppp_synctty_receive while another * calls ppp_synctty_close, which zeroes tty->disc_data and * frees the memory that ppp_synctty_receive is using. The best * way to fix this is to use a rwlock in the tty struct, but for now * we use a single global rwlock for all ttys in ppp line discipline. * * FIXME: Fixed in tty_io nowadays. */ static DEFINE_RWLOCK(disc_data_lock); static struct syncppp *sp_get(struct tty_struct *tty) { struct syncppp *ap; read_lock(&disc_data_lock); ap = tty->disc_data; if (ap != NULL) refcount_inc(&ap->refcnt); read_unlock(&disc_data_lock); return ap; } static void sp_put(struct syncppp *ap) { if (refcount_dec_and_test(&ap->refcnt)) complete(&ap->dead_cmp); } /* * Called when a tty is put into sync-PPP line discipline. */ static int ppp_sync_open(struct tty_struct *tty) { struct syncppp *ap; int err; int speed; if (tty->ops->write == NULL) return -EOPNOTSUPP; ap = kzalloc(sizeof(*ap), GFP_KERNEL); err = -ENOMEM; if (!ap) goto out; /* initialize the syncppp structure */ ap->tty = tty; ap->mru = PPP_MRU; spin_lock_init(&ap->xmit_lock); spin_lock_init(&ap->recv_lock); ap->xaccm[0] = ~0U; ap->xaccm[3] = 0x60000000U; ap->raccm = ~0U; skb_queue_head_init(&ap->rqueue); tasklet_setup(&ap->tsk, ppp_sync_process); refcount_set(&ap->refcnt, 1); init_completion(&ap->dead_cmp); ap->chan.private = ap; ap->chan.ops = &sync_ops; ap->chan.mtu = PPP_MRU; ap->chan.hdrlen = 2; /* for A/C bytes */ speed = tty_get_baud_rate(tty); ap->chan.speed = speed; err = ppp_register_channel(&ap->chan); if (err) goto out_free; tty->disc_data = ap; tty->receive_room = 65536; return 0; out_free: kfree(ap); out: return err; } /* * Called when the tty is put into another line discipline * or it hangs up. We have to wait for any cpu currently * executing in any of the other ppp_synctty_* routines to * finish before we can call ppp_unregister_channel and free * the syncppp struct. This routine must be called from * process context, not interrupt or softirq context. */ static void ppp_sync_close(struct tty_struct *tty) { struct syncppp *ap; write_lock_irq(&disc_data_lock); ap = tty->disc_data; tty->disc_data = NULL; write_unlock_irq(&disc_data_lock); if (!ap) return; /* * We have now ensured that nobody can start using ap from now * on, but we have to wait for all existing users to finish. * Note that ppp_unregister_channel ensures that no calls to * our channel ops (i.e. ppp_sync_send/ioctl) are in progress * by the time it returns. */ if (!refcount_dec_and_test(&ap->refcnt)) wait_for_completion(&ap->dead_cmp); tasklet_kill(&ap->tsk); ppp_unregister_channel(&ap->chan); skb_queue_purge(&ap->rqueue); kfree_skb(ap->tpkt); kfree(ap); } /* * Called on tty hangup in process context. * * Wait for I/O to driver to complete and unregister PPP channel. * This is already done by the close routine, so just call that. */ static void ppp_sync_hangup(struct tty_struct *tty) { ppp_sync_close(tty); } /* * Read does nothing - no data is ever available this way. * Pppd reads and writes packets via /dev/ppp instead. */ static ssize_t ppp_sync_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t count, void **cookie, unsigned long offset) { return -EAGAIN; } /* * Write on the tty does nothing, the packets all come in * from the ppp generic stuff. */ static ssize_t ppp_sync_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t count) { return -EAGAIN; } static int ppp_synctty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct syncppp *ap = sp_get(tty); int __user *p = (int __user *)arg; int err, val; if (!ap) return -ENXIO; err = -EFAULT; switch (cmd) { case PPPIOCGCHAN: err = -EFAULT; if (put_user(ppp_channel_index(&ap->chan), p)) break; err = 0; break; case PPPIOCGUNIT: err = -EFAULT; if (put_user(ppp_unit_number(&ap->chan), p)) break; err = 0; break; case TCFLSH: /* flush our buffers and the serial port's buffer */ if (arg == TCIOFLUSH || arg == TCOFLUSH) ppp_sync_flush_output(ap); err = n_tty_ioctl_helper(tty, cmd, arg); break; case FIONREAD: val = 0; if (put_user(val, p)) break; err = 0; break; default: err = tty_mode_ioctl(tty, cmd, arg); break; } sp_put(ap); return err; } /* May sleep, don't call from interrupt level or with interrupts disabled */ static void ppp_sync_receive(struct tty_struct *tty, const u8 *buf, const u8 *cflags, size_t count) { struct syncppp *ap = sp_get(tty); unsigned long flags; if (!ap) return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_sync_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); if (!skb_queue_empty(&ap->rqueue)) tasklet_schedule(&ap->tsk); sp_put(ap); tty_unthrottle(tty); } static void ppp_sync_wakeup(struct tty_struct *tty) { struct syncppp *ap = sp_get(tty); clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); if (!ap) return; set_bit(XMIT_WAKEUP, &ap->xmit_flags); tasklet_schedule(&ap->tsk); sp_put(ap); } static struct tty_ldisc_ops ppp_sync_ldisc = { .owner = THIS_MODULE, .num = N_SYNC_PPP, .name = "pppsync", .open = ppp_sync_open, .close = ppp_sync_close, .hangup = ppp_sync_hangup, .read = ppp_sync_read, .write = ppp_sync_write, .ioctl = ppp_synctty_ioctl, .receive_buf = ppp_sync_receive, .write_wakeup = ppp_sync_wakeup, }; static int __init ppp_sync_init(void) { int err; err = tty_register_ldisc(&ppp_sync_ldisc); if (err != 0) printk(KERN_ERR "PPP_sync: error %d registering line disc.\n", err); return err; } /* * The following routines provide the PPP channel interface. */ static int ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) { struct syncppp *ap = chan->private; int err, val; u32 accm[8]; void __user *argp = (void __user *)arg; u32 __user *p = argp; err = -EFAULT; switch (cmd) { case PPPIOCGFLAGS: val = ap->flags | ap->rbits; if (put_user(val, (int __user *) argp)) break; err = 0; break; case PPPIOCSFLAGS: if (get_user(val, (int __user *) argp)) break; ap->flags = val & ~SC_RCV_BITS; spin_lock_irq(&ap->recv_lock); ap->rbits = val & SC_RCV_BITS; spin_unlock_irq(&ap->recv_lock); err = 0; break; case PPPIOCGASYNCMAP: if (put_user(ap->xaccm[0], p)) break; err = 0; break; case PPPIOCSASYNCMAP: if (get_user(ap->xaccm[0], p)) break; err = 0; break; case PPPIOCGRASYNCMAP: if (put_user(ap->raccm, p)) break; err = 0; break; case PPPIOCSRASYNCMAP: if (get_user(ap->raccm, p)) break; err = 0; break; case PPPIOCGXASYNCMAP: if (copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm))) break; err = 0; break; case PPPIOCSXASYNCMAP: if (copy_from_user(accm, argp, sizeof(accm))) break; accm[2] &= ~0x40000000U; /* can't escape 0x5e */ accm[3] |= 0x60000000U; /* must escape 0x7d, 0x7e */ memcpy(ap->xaccm, accm, sizeof(ap->xaccm)); err = 0; break; case PPPIOCGMRU: if (put_user(ap->mru, (int __user *) argp)) break; err = 0; break; case PPPIOCSMRU: if (get_user(val, (int __user *) argp)) break; if (val > U16_MAX) { err = -EINVAL; break; } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; err = 0; break; default: err = -ENOTTY; } return err; } /* * This is called at softirq level to deliver received packets * to the ppp_generic code, and to tell the ppp_generic code * if we can accept more output now. */ static void ppp_sync_process(struct tasklet_struct *t) { struct syncppp *ap = from_tasklet(ap, t, tsk); struct sk_buff *skb; /* process received packets */ while ((skb = skb_dequeue(&ap->rqueue)) != NULL) { if (skb->len == 0) { /* zero length buffers indicate error */ ppp_input_error(&ap->chan, 0); kfree_skb(skb); } else ppp_input(&ap->chan, skb); } /* try to push more stuff out */ if (test_bit(XMIT_WAKEUP, &ap->xmit_flags) && ppp_sync_push(ap)) ppp_output_wakeup(&ap->chan); } /* * Procedures for encapsulation and framing. */ static struct sk_buff* ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb) { int proto; unsigned char *data; int islcp; data = skb->data; proto = get_unaligned_be16(data); /* LCP packets with codes between 1 (configure-request) * and 7 (code-reject) must be sent as though no options * have been negotiated. */ islcp = proto == PPP_LCP && 1 <= data[2] && data[2] <= 7; /* compress protocol field if option enabled */ if (data[0] == 0 && (ap->flags & SC_COMP_PROT) && !islcp) skb_pull(skb,1); /* prepend address/control fields if necessary */ if ((ap->flags & SC_COMP_AC) == 0 || islcp) { if (skb_headroom(skb) < 2) { struct sk_buff *npkt = dev_alloc_skb(skb->len + 2); if (npkt == NULL) { kfree_skb(skb); return NULL; } skb_reserve(npkt,2); skb_copy_from_linear_data(skb, skb_put(npkt, skb->len), skb->len); consume_skb(skb); skb = npkt; } skb_push(skb,2); skb->data[0] = PPP_ALLSTATIONS; skb->data[1] = PPP_UI; } ap->last_xmit = jiffies; if (skb && ap->flags & SC_LOG_OUTPKT) ppp_print_buffer ("send buffer", skb->data, skb->len); return skb; } /* * Transmit-side routines. */ /* * Send a packet to the peer over an sync tty line. * Returns 1 iff the packet was accepted. * If the packet was not accepted, we will call ppp_output_wakeup * at some later time. */ static int ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb) { struct syncppp *ap = chan->private; ppp_sync_push(ap); if (test_and_set_bit(XMIT_FULL, &ap->xmit_flags)) return 0; /* already full */ skb = ppp_sync_txmunge(ap, skb); if (skb != NULL) ap->tpkt = skb; else clear_bit(XMIT_FULL, &ap->xmit_flags); ppp_sync_push(ap); return 1; } /* * Push as much data as possible out to the tty. */ static int ppp_sync_push(struct syncppp *ap) { int sent, done = 0; struct tty_struct *tty = ap->tty; int tty_stuffed = 0; if (!spin_trylock_bh(&ap->xmit_lock)) return 0; for (;;) { if (test_and_clear_bit(XMIT_WAKEUP, &ap->xmit_flags)) tty_stuffed = 0; if (!tty_stuffed && ap->tpkt) { set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); sent = tty->ops->write(tty, ap->tpkt->data, ap->tpkt->len); if (sent < 0) goto flush; /* error, e.g. loss of CD */ if (sent < ap->tpkt->len) { tty_stuffed = 1; } else { consume_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } continue; } /* haven't made any progress */ spin_unlock_bh(&ap->xmit_lock); if (!(test_bit(XMIT_WAKEUP, &ap->xmit_flags) || (!tty_stuffed && ap->tpkt))) break; if (!spin_trylock_bh(&ap->xmit_lock)) break; } return done; flush: if (ap->tpkt) { kfree_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } spin_unlock_bh(&ap->xmit_lock); return done; } /* * Flush output from our internal buffers. * Called for the TCFLSH ioctl. */ static void ppp_sync_flush_output(struct syncppp *ap) { int done = 0; spin_lock_bh(&ap->xmit_lock); if (ap->tpkt != NULL) { kfree_skb(ap->tpkt); ap->tpkt = NULL; clear_bit(XMIT_FULL, &ap->xmit_flags); done = 1; } spin_unlock_bh(&ap->xmit_lock); if (done) ppp_output_wakeup(&ap->chan); } /* * Receive-side routines. */ /* called when the tty driver has data for us. * * Data is frame oriented: each call to ppp_sync_input is considered * a whole frame. If the 1st flag byte is non-zero then the whole * frame is considered to be in error and is tossed. */ static void ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count) { struct sk_buff *skb; unsigned char *p; if (count == 0) return; if (ap->flags & SC_LOG_INPKT) ppp_print_buffer ("receive buffer", buf, count); /* stuff the chars in the skb */ skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) { printk(KERN_ERR "PPPsync: no memory (input pkt)\n"); goto err; } /* Try to get the payload 4-byte aligned */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); if (flags && *flags) { /* error flag set, ignore frame */ goto err; } else if (count > skb_tailroom(skb)) { /* packet overflowed MRU */ goto err; } skb_put_data(skb, buf, count); /* strip address/control field if present */ p = skb->data; if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { /* chop off address/control */ if (skb->len < 3) goto err; p = skb_pull(skb, 2); } /* PPP packet length should be >= 2 bytes when protocol field is not * compressed. */ if (!(p[0] & 0x01) && skb->len < 2) goto err; /* queue the frame to be processed */ skb_queue_tail(&ap->rqueue, skb); return; err: /* queue zero length packet as error indication */ if (skb || (skb = dev_alloc_skb(0))) { skb_trim(skb, 0); skb_queue_tail(&ap->rqueue, skb); } } static void __exit ppp_sync_cleanup(void) { tty_unregister_ldisc(&ppp_sync_ldisc); } module_init(ppp_sync_init); module_exit(ppp_sync_cleanup); MODULE_DESCRIPTION("PPP synchronous TTY channel module"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_SYNC_PPP);
3 1 1 1 2 3 1 3 1 3 1 3 7 6 6 6 6 6 6 6 6 6 6 6 6 6 5 1 6 4 6 2 6 3 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 /* * xxHash - Extremely Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet. * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. This program is dual-licensed; you may select * either version 2 of the GNU General Public License ("GPL") or BSD license * ("BSD"). * * You can contact the author at: * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ #include <linux/unaligned.h> #include <linux/errno.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/xxhash.h> /*-************************************* * Macros **************************************/ #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) #define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r))) #ifdef __LITTLE_ENDIAN # define XXH_CPU_LITTLE_ENDIAN 1 #else # define XXH_CPU_LITTLE_ENDIAN 0 #endif /*-************************************* * Constants **************************************/ static const uint32_t PRIME32_1 = 2654435761U; static const uint32_t PRIME32_2 = 2246822519U; static const uint32_t PRIME32_3 = 3266489917U; static const uint32_t PRIME32_4 = 668265263U; static const uint32_t PRIME32_5 = 374761393U; static const uint64_t PRIME64_1 = 11400714785074694791ULL; static const uint64_t PRIME64_2 = 14029467366897019727ULL; static const uint64_t PRIME64_3 = 1609587929392839161ULL; static const uint64_t PRIME64_4 = 9650029242287828579ULL; static const uint64_t PRIME64_5 = 2870177450012600261ULL; /*-************************** * Utils ***************************/ void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) { memcpy(dst, src, sizeof(*dst)); } EXPORT_SYMBOL(xxh32_copy_state); void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) { memcpy(dst, src, sizeof(*dst)); } EXPORT_SYMBOL(xxh64_copy_state); /*-*************************** * Simple Hash Functions ****************************/ static uint32_t xxh32_round(uint32_t seed, const uint32_t input) { seed += input * PRIME32_2; seed = xxh_rotl32(seed, 13); seed *= PRIME32_1; return seed; } uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) { const uint8_t *p = (const uint8_t *)input; const uint8_t *b_end = p + len; uint32_t h32; if (len >= 16) { const uint8_t *const limit = b_end - 16; uint32_t v1 = seed + PRIME32_1 + PRIME32_2; uint32_t v2 = seed + PRIME32_2; uint32_t v3 = seed + 0; uint32_t v4 = seed - PRIME32_1; do { v1 = xxh32_round(v1, get_unaligned_le32(p)); p += 4; v2 = xxh32_round(v2, get_unaligned_le32(p)); p += 4; v3 = xxh32_round(v3, get_unaligned_le32(p)); p += 4; v4 = xxh32_round(v4, get_unaligned_le32(p)); p += 4; } while (p <= limit); h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) + xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (uint32_t)len; while (p + 4 <= b_end) { h32 += get_unaligned_le32(p) * PRIME32_3; h32 = xxh_rotl32(h32, 17) * PRIME32_4; p += 4; } while (p < b_end) { h32 += (*p) * PRIME32_5; h32 = xxh_rotl32(h32, 11) * PRIME32_1; p++; } h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } EXPORT_SYMBOL(xxh32); static uint64_t xxh64_round(uint64_t acc, const uint64_t input) { acc += input * PRIME64_2; acc = xxh_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val) { val = xxh64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } uint64_t xxh64(const void *input, const size_t len, const uint64_t seed) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; uint64_t h64; if (len >= 32) { const uint8_t *const limit = b_end - 32; uint64_t v1 = seed + PRIME64_1 + PRIME64_2; uint64_t v2 = seed + PRIME64_2; uint64_t v3 = seed + 0; uint64_t v4 = seed - PRIME64_1; do { v1 = xxh64_round(v1, get_unaligned_le64(p)); p += 8; v2 = xxh64_round(v2, get_unaligned_le64(p)); p += 8; v3 = xxh64_round(v3, get_unaligned_le64(p)); p += 8; v4 = xxh64_round(v4, get_unaligned_le64(p)); p += 8; } while (p <= limit); h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); h64 = xxh64_merge_round(h64, v1); h64 = xxh64_merge_round(h64, v2); h64 = xxh64_merge_round(h64, v3); h64 = xxh64_merge_round(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (uint64_t)len; while (p + 8 <= b_end) { const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); h64 ^= k1; h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; p += 8; } if (p + 4 <= b_end) { h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p += 4; } while (p < b_end) { h64 ^= (*p) * PRIME64_5; h64 = xxh_rotl64(h64, 11) * PRIME64_1; p++; } h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } EXPORT_SYMBOL(xxh64); /*-************************************************** * Advanced Hash Functions ***************************************************/ void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ struct xxh32_state state; memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; memcpy(statePtr, &state, sizeof(state)); } EXPORT_SYMBOL(xxh32_reset); void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ struct xxh64_state state; memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; memcpy(statePtr, &state, sizeof(state)); } EXPORT_SYMBOL(xxh64_reset); int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; if (input == NULL) return -EINVAL; state->total_len_32 += (uint32_t)len; state->large_len |= (len >= 16) | (state->total_len_32 >= 16); if (state->memsize + len < 16) { /* fill in tmp buffer */ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); state->memsize += (uint32_t)len; return 0; } if (state->memsize) { /* some data left from previous update */ const uint32_t *p32 = state->mem32; memcpy((uint8_t *)(state->mem32) + state->memsize, input, 16 - state->memsize); state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); p32++; state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); p32++; state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); p32++; state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); p32++; p += 16-state->memsize; state->memsize = 0; } if (p <= b_end - 16) { const uint8_t *const limit = b_end - 16; uint32_t v1 = state->v1; uint32_t v2 = state->v2; uint32_t v3 = state->v3; uint32_t v4 = state->v4; do { v1 = xxh32_round(v1, get_unaligned_le32(p)); p += 4; v2 = xxh32_round(v2, get_unaligned_le32(p)); p += 4; v3 = xxh32_round(v3, get_unaligned_le32(p)); p += 4; v4 = xxh32_round(v4, get_unaligned_le32(p)); p += 4; } while (p <= limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < b_end) { memcpy(state->mem32, p, (size_t)(b_end-p)); state->memsize = (uint32_t)(b_end-p); } return 0; } EXPORT_SYMBOL(xxh32_update); uint32_t xxh32_digest(const struct xxh32_state *state) { const uint8_t *p = (const uint8_t *)state->mem32; const uint8_t *const b_end = (const uint8_t *)(state->mem32) + state->memsize; uint32_t h32; if (state->large_len) { h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; while (p + 4 <= b_end) { h32 += get_unaligned_le32(p) * PRIME32_3; h32 = xxh_rotl32(h32, 17) * PRIME32_4; p += 4; } while (p < b_end) { h32 += (*p) * PRIME32_5; h32 = xxh_rotl32(h32, 11) * PRIME32_1; p++; } h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } EXPORT_SYMBOL(xxh32_digest); int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; if (input == NULL) return -EINVAL; state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len); state->memsize += (uint32_t)len; return 0; } if (state->memsize) { /* tmp buffer is full */ uint64_t *p64 = state->mem64; memcpy(((uint8_t *)p64) + state->memsize, input, 32 - state->memsize); state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64)); p64++; state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64)); p64++; state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64)); p64++; state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64)); p += 32 - state->memsize; state->memsize = 0; } if (p + 32 <= b_end) { const uint8_t *const limit = b_end - 32; uint64_t v1 = state->v1; uint64_t v2 = state->v2; uint64_t v3 = state->v3; uint64_t v4 = state->v4; do { v1 = xxh64_round(v1, get_unaligned_le64(p)); p += 8; v2 = xxh64_round(v2, get_unaligned_le64(p)); p += 8; v3 = xxh64_round(v3, get_unaligned_le64(p)); p += 8; v4 = xxh64_round(v4, get_unaligned_le64(p)); p += 8; } while (p <= limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < b_end) { memcpy(state->mem64, p, (size_t)(b_end-p)); state->memsize = (uint32_t)(b_end - p); } return 0; } EXPORT_SYMBOL(xxh64_update); uint64_t xxh64_digest(const struct xxh64_state *state) { const uint8_t *p = (const uint8_t *)state->mem64; const uint8_t *const b_end = (const uint8_t *)state->mem64 + state->memsize; uint64_t h64; if (state->total_len >= 32) { const uint64_t v1 = state->v1; const uint64_t v2 = state->v2; const uint64_t v3 = state->v3; const uint64_t v4 = state->v4; h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); h64 = xxh64_merge_round(h64, v1); h64 = xxh64_merge_round(h64, v2); h64 = xxh64_merge_round(h64, v3); h64 = xxh64_merge_round(h64, v4); } else { h64 = state->v3 + PRIME64_5; } h64 += (uint64_t)state->total_len; while (p + 8 <= b_end) { const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); h64 ^= k1; h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; p += 8; } if (p + 4 <= b_end) { h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p += 4; } while (p < b_end) { h64 ^= (*p) * PRIME64_5; h64 = xxh_rotl64(h64, 11) * PRIME64_1; p++; } h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } EXPORT_SYMBOL(xxh64_digest); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("xxHash");
1 2 10 10 10 10 10 10 10 10 10 10 10 15 3 12 1 11 10 1 15 15 15 5 10 10 1 10 10 15 10 10 1 1 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 // SPDX-License-Identifier: GPL-2.0-only /* * linux/net/netfilter/xt_IDLETIMER.c * * Netfilter module to trigger a timer when packet matches. * After timer expires a kevent will be sent. * * Copyright (C) 2004, 2010 Nokia Corporation * Written by Timo Teras <ext-timo.teras@nokia.com> * * Converted to x_tables and reworked for upstream inclusion * by Luciano Coelho <luciano.coelho@nokia.com> * * Contact: Luciano Coelho <luciano.coelho@nokia.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/timer.h> #include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_IDLETIMER.h> #include <linux/kdev_t.h> #include <linux/kobject.h> #include <linux/workqueue.h> #include <linux/sysfs.h> struct idletimer_tg { struct list_head entry; struct alarm alarm; struct timer_list timer; struct work_struct work; struct kobject *kobj; struct device_attribute attr; unsigned int refcnt; u8 timer_type; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); static struct kobject *idletimer_tg_kobj; static struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) { struct idletimer_tg *entry; list_for_each_entry(entry, &idletimer_tg_list, entry) { if (!strcmp(label, entry->attr.attr.name)) return entry; } return NULL; } static ssize_t idletimer_tg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idletimer_tg *timer; unsigned long expires = 0; struct timespec64 ktimespec = {}; long time_diff = 0; mutex_lock(&list_mutex); timer = __idletimer_tg_find_by_label(attr->attr.name); if (timer) { if (timer->timer_type & XT_IDLETIMER_ALARM) { ktime_t expires_alarm = alarm_expires_remaining(&timer->alarm); ktimespec = ktime_to_timespec64(expires_alarm); time_diff = ktimespec.tv_sec; } else { expires = timer->timer.expires; time_diff = jiffies_to_msecs(expires - jiffies) / 1000; } } mutex_unlock(&list_mutex); if (time_after(expires, jiffies) || ktimespec.tv_sec > 0) return sysfs_emit(buf, "%ld\n", time_diff); return sysfs_emit(buf, "0\n"); } static void idletimer_tg_work(struct work_struct *work) { struct idletimer_tg *timer = container_of(work, struct idletimer_tg, work); sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); } static void idletimer_tg_expired(struct timer_list *t) { struct idletimer_tg *timer = from_timer(timer, t, timer); pr_debug("timer %s expired\n", timer->attr.attr.name); schedule_work(&timer->work); } static void idletimer_tg_alarmproc(struct alarm *alarm, ktime_t now) { struct idletimer_tg *timer = alarm->data; pr_debug("alarm %s expired\n", timer->attr.attr.name); schedule_work(&timer->work); } static int idletimer_check_sysfs_name(const char *name, unsigned int size) { int ret; ret = xt_check_proc_name(name, size); if (ret < 0) return ret; if (!strcmp(name, "power") || !strcmp(name, "subsystem") || !strcmp(name, "uevent")) return -EINVAL; return 0; } static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; } ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); if (ret < 0) goto out_free_timer; sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; goto out_free_timer; } info->timer->attr.attr.mode = 0444; info->timer->attr.show = idletimer_tg_show; ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); if (ret < 0) { pr_debug("couldn't add file to sysfs"); goto out_free_attr; } list_add(&info->timer->entry, &idletimer_tg_list); timer_setup(&info->timer->timer, idletimer_tg_expired, 0); info->timer->refcnt = 1; INIT_WORK(&info->timer->work, idletimer_tg_work); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); return 0; out_free_attr: kfree(info->timer->attr.attr.name); out_free_timer: kfree(info->timer); out: return ret; } static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info) { int ret; info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; } ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); if (ret < 0) goto out_free_timer; sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; goto out_free_timer; } info->timer->attr.attr.mode = 0444; info->timer->attr.show = idletimer_tg_show; ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); if (ret < 0) { pr_debug("couldn't add file to sysfs"); goto out_free_attr; } /* notify userspace */ kobject_uevent(idletimer_tg_kobj,KOBJ_ADD); list_add(&info->timer->entry, &idletimer_tg_list); pr_debug("timer type value is %u", info->timer_type); info->timer->timer_type = info->timer_type; info->timer->refcnt = 1; INIT_WORK(&info->timer->work, idletimer_tg_work); if (info->timer->timer_type & XT_IDLETIMER_ALARM) { ktime_t tout; alarm_init(&info->timer->alarm, ALARM_BOOTTIME, idletimer_tg_alarmproc); info->timer->alarm.data = info->timer; tout = ktime_set(info->timeout, 0); alarm_start_relative(&info->timer->alarm, tout); } else { timer_setup(&info->timer->timer, idletimer_tg_expired, 0); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); } return 0; out_free_attr: kfree(info->timer->attr.attr.name); out_free_timer: kfree(info->timer); out: return ret; } /* * The actual xt_tables plugin. */ static unsigned int idletimer_tg_target(struct sk_buff *skb, const struct xt_action_param *par) { const struct idletimer_tg_info *info = par->targinfo; pr_debug("resetting timer %s, timeout period %u\n", info->label, info->timeout); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); return XT_CONTINUE; } /* * The actual xt_tables plugin. */ static unsigned int idletimer_tg_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct idletimer_tg_info_v1 *info = par->targinfo; pr_debug("resetting timer %s, timeout period %u\n", info->label, info->timeout); if (info->timer->timer_type & XT_IDLETIMER_ALARM) { ktime_t tout = ktime_set(info->timeout, 0); alarm_start_relative(&info->timer->alarm, tout); } else { mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); } return XT_CONTINUE; } static int idletimer_tg_helper(struct idletimer_tg_info *info) { if (info->timeout == 0) { pr_debug("timeout value is zero\n"); return -EINVAL; } if (info->timeout >= INT_MAX / 1000) { pr_debug("timeout value is too big\n"); return -EINVAL; } if (info->label[0] == '\0' || strnlen(info->label, MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { pr_debug("label is empty or not nul-terminated\n"); return -EINVAL; } return 0; } static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; pr_debug("checkentry targinfo%s\n", info->label); ret = idletimer_tg_helper(info); if(ret < 0) { pr_debug("checkentry helper return invalid\n"); return -EINVAL; } mutex_lock(&list_mutex); info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { ret = idletimer_tg_create(info); if (ret < 0) { pr_debug("failed to create timer\n"); mutex_unlock(&list_mutex); return ret; } } mutex_unlock(&list_mutex); return 0; } static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par) { struct idletimer_tg_info_v1 *info = par->targinfo; int ret; pr_debug("checkentry targinfo%s\n", info->label); if (info->send_nl_msg) return -EOPNOTSUPP; ret = idletimer_tg_helper((struct idletimer_tg_info *)info); if(ret < 0) { pr_debug("checkentry helper return invalid\n"); return -EINVAL; } if (info->timer_type > XT_IDLETIMER_ALARM) { pr_debug("invalid value for timer type\n"); return -EINVAL; } mutex_lock(&list_mutex); info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { if (info->timer->timer_type != info->timer_type) { pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); mutex_unlock(&list_mutex); return -EINVAL; } info->timer->refcnt++; if (info->timer_type & XT_IDLETIMER_ALARM) { /* calculate remaining expiry time */ ktime_t tout = alarm_expires_remaining(&info->timer->alarm); struct timespec64 ktimespec = ktime_to_timespec64(tout); if (ktimespec.tv_sec > 0) { pr_debug("time_expiry_remaining %lld\n", ktimespec.tv_sec); alarm_start_relative(&info->timer->alarm, tout); } } else { mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); } pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { ret = idletimer_tg_create_v1(info); if (ret < 0) { pr_debug("failed to create timer\n"); mutex_unlock(&list_mutex); return ret; } } mutex_unlock(&list_mutex); return 0; } static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) { const struct idletimer_tg_info *info = par->targinfo; pr_debug("destroy targinfo %s\n", info->label); mutex_lock(&list_mutex); if (--info->timer->refcnt > 0) { pr_debug("decreased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); mutex_unlock(&list_mutex); return; } pr_debug("deleting timer %s\n", info->label); list_del(&info->timer->entry); mutex_unlock(&list_mutex); timer_shutdown_sync(&info->timer->timer); cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); } static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) { const struct idletimer_tg_info_v1 *info = par->targinfo; pr_debug("destroy targinfo %s\n", info->label); mutex_lock(&list_mutex); if (--info->timer->refcnt > 0) { pr_debug("decreased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); mutex_unlock(&list_mutex); return; } pr_debug("deleting timer %s\n", info->label); list_del(&info->timer->entry); mutex_unlock(&list_mutex); if (info->timer->timer_type & XT_IDLETIMER_ALARM) { alarm_cancel(&info->timer->alarm); } else { timer_shutdown_sync(&info->timer->timer); } cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); } static struct xt_target idletimer_tg[] __read_mostly = { { .name = "IDLETIMER", .family = NFPROTO_IPV4, .target = idletimer_tg_target, .targetsize = sizeof(struct idletimer_tg_info), .usersize = offsetof(struct idletimer_tg_info, timer), .checkentry = idletimer_tg_checkentry, .destroy = idletimer_tg_destroy, .me = THIS_MODULE, }, { .name = "IDLETIMER", .family = NFPROTO_IPV4, .revision = 1, .target = idletimer_tg_target_v1, .targetsize = sizeof(struct idletimer_tg_info_v1), .usersize = offsetof(struct idletimer_tg_info_v1, timer), .checkentry = idletimer_tg_checkentry_v1, .destroy = idletimer_tg_destroy_v1, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "IDLETIMER", .family = NFPROTO_IPV6, .target = idletimer_tg_target, .targetsize = sizeof(struct idletimer_tg_info), .usersize = offsetof(struct idletimer_tg_info, timer), .checkentry = idletimer_tg_checkentry, .destroy = idletimer_tg_destroy, .me = THIS_MODULE, }, { .name = "IDLETIMER", .family = NFPROTO_IPV6, .revision = 1, .target = idletimer_tg_target_v1, .targetsize = sizeof(struct idletimer_tg_info_v1), .usersize = offsetof(struct idletimer_tg_info_v1, timer), .checkentry = idletimer_tg_checkentry_v1, .destroy = idletimer_tg_destroy_v1, .me = THIS_MODULE, }, #endif }; static struct class *idletimer_tg_class; static struct device *idletimer_tg_device; static int __init idletimer_tg_init(void) { int err; idletimer_tg_class = class_create("xt_idletimer"); err = PTR_ERR(idletimer_tg_class); if (IS_ERR(idletimer_tg_class)) { pr_debug("couldn't register device class\n"); goto out; } idletimer_tg_device = device_create(idletimer_tg_class, NULL, MKDEV(0, 0), NULL, "timers"); err = PTR_ERR(idletimer_tg_device); if (IS_ERR(idletimer_tg_device)) { pr_debug("couldn't register system device\n"); goto out_class; } idletimer_tg_kobj = &idletimer_tg_device->kobj; err = xt_register_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; } return 0; out_dev: device_destroy(idletimer_tg_class, MKDEV(0, 0)); out_class: class_destroy(idletimer_tg_class); out: return err; } static void __exit idletimer_tg_exit(void) { xt_unregister_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); device_destroy(idletimer_tg_class, MKDEV(0, 0)); class_destroy(idletimer_tg_class); } module_init(idletimer_tg_init); module_exit(idletimer_tg_exit); MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>"); MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); MODULE_DESCRIPTION("Xtables: idle time monitor"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("ipt_IDLETIMER"); MODULE_ALIAS("ip6t_IDLETIMER");
2 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 // SPDX-License-Identifier: GPL-2.0-only #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <asm/string.h> #include <linux/kmod.h> #include <linux/sysctl.h> #include <net/ip_vs.h> /* IPVS pe list */ static LIST_HEAD(ip_vs_pe); /* semaphore for IPVS PEs. */ static DEFINE_MUTEX(ip_vs_pe_mutex); /* Get pe in the pe list by name */ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) { struct ip_vs_pe *pe; IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, pe_name); rcu_read_lock(); list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) { /* Test and get the modules atomically */ if (pe->module && !try_module_get(pe->module)) { /* This pe is just deleted */ continue; } if (strcmp(pe_name, pe->name)==0) { /* HIT */ rcu_read_unlock(); return pe; } module_put(pe->module); } rcu_read_unlock(); return NULL; } /* Lookup pe and try to load it if it doesn't exist */ struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) { struct ip_vs_pe *pe; /* Search for the pe by name */ pe = __ip_vs_pe_getbyname(name); /* If pe not found, load the module and search again */ if (!pe) { request_module("ip_vs_pe_%s", name); pe = __ip_vs_pe_getbyname(name); } return pe; } /* Register a pe in the pe list */ int register_ip_vs_pe(struct ip_vs_pe *pe) { struct ip_vs_pe *tmp; /* increase the module use count */ if (!ip_vs_use_count_inc()) return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist * in the pe list. */ list_for_each_entry(tmp, &ip_vs_pe, n_list) { if (strcmp(tmp->name, pe->name) == 0) { mutex_unlock(&ip_vs_pe_mutex); ip_vs_use_count_dec(); pr_err("%s(): [%s] pe already existed " "in the system\n", __func__, pe->name); return -EINVAL; } } /* Add it into the d-linked pe list */ list_add_rcu(&pe->n_list, &ip_vs_pe); mutex_unlock(&ip_vs_pe_mutex); pr_info("[%s] pe registered.\n", pe->name); return 0; } EXPORT_SYMBOL_GPL(register_ip_vs_pe); /* Unregister a pe from the pe list */ int unregister_ip_vs_pe(struct ip_vs_pe *pe) { mutex_lock(&ip_vs_pe_mutex); /* Remove it from the d-linked pe list */ list_del_rcu(&pe->n_list); mutex_unlock(&ip_vs_pe_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); pr_info("[%s] pe unregistered.\n", pe->name); return 0; } EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
80 66 68 69 78 67 1 5 65 65 3 3 4 24 23 66 61 64 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H #include <linux/in.h> #include <linux/interrupt.h> #include <linux/ktime.h> #include <linux/list.h> #include <linux/uio.h> #include <linux/workqueue.h> #include <net/inet_connection_sock.h> #include <net/inet_sock.h> #include <net/inet_timewait_sock.h> #include <net/tcp_states.h> #include <uapi/linux/dccp.h> enum dccp_state { DCCP_OPEN = TCP_ESTABLISHED, DCCP_REQUESTING = TCP_SYN_SENT, DCCP_LISTEN = TCP_LISTEN, DCCP_RESPOND = TCP_SYN_RECV, /* * States involved in closing a DCCP connection: * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq. * * 2) CLOSING can have three different meanings (RFC 4340, 8.3): * a. Client has performed active-close, has sent a Close to the server * from state OPEN or PARTOPEN, and is waiting for the final Reset * (in this case, SOCK_DONE == 1). * b. Client is asked to perform passive-close, by receiving a CloseReq * in (PART)OPEN state. It sends a Close and waits for final Reset * (in this case, SOCK_DONE == 0). * c. Server performs an active-close as in (a), keeps TIMEWAIT state. * * 3) The following intermediate states are employed to give passively * closing nodes a chance to process their unread data: * - PASSIVE_CLOSE (from OPEN => CLOSED) and * - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above). */ DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1, DCCP_PASSIVE_CLOSE = TCP_CLOSE_WAIT, /* any node receiving a Close */ DCCP_CLOSING = TCP_CLOSING, DCCP_TIME_WAIT = TCP_TIME_WAIT, DCCP_CLOSED = TCP_CLOSE, DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, DCCP_PARTOPEN = TCP_MAX_STATES, DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ DCCP_MAX_STATES }; enum { DCCPF_OPEN = TCPF_ESTABLISHED, DCCPF_REQUESTING = TCPF_SYN_SENT, DCCPF_LISTEN = TCPF_LISTEN, DCCPF_RESPOND = TCPF_SYN_RECV, DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1, DCCPF_CLOSING = TCPF_CLOSING, DCCPF_TIME_WAIT = TCPF_TIME_WAIT, DCCPF_CLOSED = TCPF_CLOSE, DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), }; static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) { return (struct dccp_hdr *)skb_transport_header(skb); } static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) { skb_push(skb, headlen); skb_reset_transport_header(skb); return memset(skb_transport_header(skb), 0, headlen); } static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh) { return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh)); } static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) { return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); } static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); return __dccp_basic_hdr_len(dh); } static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) { __u64 seq_nr = ntohs(dh->dccph_seq); if (dh->dccph_x != 0) seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(dh)->dccph_seq_low); else seq_nr += (u32)dh->dccph_seq2 << 16; return seq_nr; } static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) { return (struct dccp_hdr_request *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) { return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) { const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low); } static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) { return (struct dccp_hdr_response *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) { return (struct dccp_hdr_reset *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) { return __dccp_basic_hdr_len(dh) + dccp_packet_hdr_len(dh->dccph_type); } static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) { return __dccp_hdr_len(dccp_hdr(skb)); } /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1) * @dreq_gss: greatest sequence number sent (for retransmitted Responses) * @dreq_isr: initial sequence number received in the first Request * @dreq_gsr: greatest sequence number received (for retransmitted Request(s)) * @dreq_service: service code present on the Request (there is just one) * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo */ struct dccp_request_sock { struct inet_request_sock dreq_inet_rsk; __u64 dreq_iss; __u64 dreq_gss; __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) { return (struct dccp_request_sock *)req; } extern struct inet_timewait_death_row dccp_death_row; extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct sk_buff *skb); struct dccp_options_received { u64 dccpor_ndp:48; u32 dccpor_timestamp; u32 dccpor_timestamp_echo; u32 dccpor_elapsed_time; }; struct ccid; enum dccp_role { DCCP_ROLE_UNDEFINED, DCCP_ROLE_LISTEN, DCCP_ROLE_CLIENT, DCCP_ROLE_SERVER, }; struct dccp_service_list { __u32 dccpsl_nr; __be32 dccpsl_list[]; }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) #define DCCP_SERVICE_CODE_IS_ABSENT 0 static inline bool dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) { if (likely(sl != NULL)) { u32 i = sl->dccpsl_nr; while (i--) if (sl->dccpsl_list[i] == service) return true; } return false; } struct dccp_ackvec; /** * struct dccp_sock - DCCP socket state * * @dccps_swl - sequence number window low * @dccps_swh - sequence number window high * @dccps_awl - acknowledgement number window low * @dccps_awh - acknowledgement number window high * @dccps_iss - initial sequence number sent * @dccps_isr - initial sequence number received * @dccps_osr - first OPEN sequence number received * @dccps_gss - greatest sequence number sent * @dccps_gsr - greatest valid sequence number received * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss * @dccps_service - first (passive sock) or unique (active sock) service code * @dccps_service_list - second .. last service code on passive socket * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_l_seq_win - local Sequence Window (influences ack number validity) * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ struct inet_connection_sock dccps_inet_connection; #define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime __u64 dccps_swl; __u64 dccps_swh; __u64 dccps_awl; __u64 dccps_awh; __u64 dccps_iss; __u64 dccps_isr; __u64 dccps_osr; __u64 dccps_gss; __u64 dccps_gsr; __u64 dccps_gar; __be32 dccps_service; __u32 dccps_mss_cache; struct dccp_service_list *dccps_service_list; __u32 dccps_timestamp_echo; __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; __u64 dccps_l_seq_win:48; __u64 dccps_r_seq_win:48; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; __u8 dccps_qpolicy; __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; __u8 dccps_sync_scheduled:1; struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; #define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ dccps_inet_connection.icsk_inet.sk) static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { case DCCP_ROLE_UNDEFINED: return "undefined"; case DCCP_ROLE_LISTEN: return "listen"; case DCCP_ROLE_SERVER: return "server"; case DCCP_ROLE_CLIENT: return "client"; } return NULL; } extern void dccp_syn_ack_timeout(const struct request_sock *req); #endif /* _LINUX_DCCP_H */
5 5 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 1 4 2 4 1 3 1 2 2 5 5 5 5 5 5 5 5 5 3 2 3 3 3 3 2 1 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 // SPDX-License-Identifier: GPL-2.0-or-later /* procfs files for key database enumeration * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <asm/errno.h> #include "internal.h" static void *proc_keys_start(struct seq_file *p, loff_t *_pos); static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos); static void proc_keys_stop(struct seq_file *p, void *v); static int proc_keys_show(struct seq_file *m, void *v); static const struct seq_operations proc_keys_ops = { .start = proc_keys_start, .next = proc_keys_next, .stop = proc_keys_stop, .show = proc_keys_show, }; static void *proc_key_users_start(struct seq_file *p, loff_t *_pos); static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos); static void proc_key_users_stop(struct seq_file *p, void *v); static int proc_key_users_show(struct seq_file *m, void *v); static const struct seq_operations proc_key_users_ops = { .start = proc_key_users_start, .next = proc_key_users_next, .stop = proc_key_users_stop, .show = proc_key_users_show, }; /* * Declare the /proc files. */ static int __init key_proc_init(void) { struct proc_dir_entry *p; p = proc_create_seq("keys", 0, NULL, &proc_keys_ops); if (!p) panic("Cannot create /proc/keys\n"); p = proc_create_seq("key-users", 0, NULL, &proc_key_users_ops); if (!p) panic("Cannot create /proc/key-users\n"); return 0; } __initcall(key_proc_init); /* * Implement "/proc/keys" to provide a list of the keys on the system that * grant View permission to the caller. */ static struct rb_node *key_serial_next(struct seq_file *p, struct rb_node *n) { struct user_namespace *user_ns = seq_user_ns(p); n = rb_next(n); while (n) { struct key *key = rb_entry(n, struct key, serial_node); if (kuid_has_mapping(user_ns, key->user->uid)) break; n = rb_next(n); } return n; } static struct key *find_ge_key(struct seq_file *p, key_serial_t id) { struct user_namespace *user_ns = seq_user_ns(p); struct rb_node *n = key_serial_tree.rb_node; struct key *minkey = NULL; while (n) { struct key *key = rb_entry(n, struct key, serial_node); if (id < key->serial) { if (!minkey || minkey->serial > key->serial) minkey = key; n = n->rb_left; } else if (id > key->serial) { n = n->rb_right; } else { minkey = key; break; } key = NULL; } if (!minkey) return NULL; for (;;) { if (kuid_has_mapping(user_ns, minkey->user->uid)) return minkey; n = rb_next(&minkey->serial_node); if (!n) return NULL; minkey = rb_entry(n, struct key, serial_node); } } static void *proc_keys_start(struct seq_file *p, loff_t *_pos) __acquires(key_serial_lock) { key_serial_t pos = *_pos; struct key *key; spin_lock(&key_serial_lock); if (*_pos > INT_MAX) return NULL; key = find_ge_key(p, pos); if (!key) return NULL; *_pos = key->serial; return &key->serial_node; } static inline key_serial_t key_node_serial(struct rb_node *n) { struct key *key = rb_entry(n, struct key, serial_node); return key->serial; } static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos) { struct rb_node *n; n = key_serial_next(p, v); if (n) *_pos = key_node_serial(n); else (*_pos)++; return n; } static void proc_keys_stop(struct seq_file *p, void *v) __releases(key_serial_lock) { spin_unlock(&key_serial_lock); } static int proc_keys_show(struct seq_file *m, void *v) { struct rb_node *_p = v; struct key *key = rb_entry(_p, struct key, serial_node); unsigned long flags; key_ref_t key_ref, skey_ref; time64_t now, expiry; char xbuf[16]; short state; u64 timo; int rc; struct keyring_search_context ctx = { .index_key = key->index_key, .cred = m->file->f_cred, .match_data.cmp = lookup_user_key_possessed, .match_data.raw_data = key, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_RECURSE), }; key_ref = make_key_ref(key, 0); /* determine if the key is possessed by this process (a test we can * skip if the key does not indicate the possessor can view it */ if (key->perm & KEY_POS_VIEW) { rcu_read_lock(); skey_ref = search_cred_keyrings_rcu(&ctx); rcu_read_unlock(); if (!IS_ERR(skey_ref)) { key_ref_put(skey_ref); key_ref = make_key_ref(key, 1); } } /* check whether the current task is allowed to view the key */ rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW); if (rc < 0) return 0; now = ktime_get_real_seconds(); rcu_read_lock(); /* come up with a suitable timeout value */ expiry = READ_ONCE(key->expiry); if (expiry == TIME64_MAX) { memcpy(xbuf, "perm", 5); } else if (now >= expiry) { memcpy(xbuf, "expd", 5); } else { timo = expiry - now; if (timo < 60) sprintf(xbuf, "%llus", timo); else if (timo < 60*60) sprintf(xbuf, "%llum", div_u64(timo, 60)); else if (timo < 60*60*24) sprintf(xbuf, "%lluh", div_u64(timo, 60 * 60)); else if (timo < 60*60*24*7) sprintf(xbuf, "%llud", div_u64(timo, 60 * 60 * 24)); else sprintf(xbuf, "%lluw", div_u64(timo, 60 * 60 * 24 * 7)); } state = key_read_state(key); #define showflag(FLAGS, LETTER, FLAG) \ ((FLAGS & (1 << FLAG)) ? LETTER : '-') flags = READ_ONCE(key->flags); seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, state != KEY_IS_UNINSTANTIATED ? 'I' : '-', showflag(flags, 'R', KEY_FLAG_REVOKED), showflag(flags, 'D', KEY_FLAG_DEAD), showflag(flags, 'Q', KEY_FLAG_IN_QUOTA), showflag(flags, 'U', KEY_FLAG_USER_CONSTRUCT), state < 0 ? 'N' : '-', showflag(flags, 'i', KEY_FLAG_INVALIDATED), refcount_read(&key->usage), xbuf, key->perm, from_kuid_munged(seq_user_ns(m), key->uid), from_kgid_munged(seq_user_ns(m), key->gid), key->type->name); #undef showflag if (key->type->describe) key->type->describe(key, m); seq_putc(m, '\n'); rcu_read_unlock(); return 0; } static struct rb_node *__key_user_next(struct user_namespace *user_ns, struct rb_node *n) { while (n) { struct key_user *user = rb_entry(n, struct key_user, node); if (kuid_has_mapping(user_ns, user->uid)) break; n = rb_next(n); } return n; } static struct rb_node *key_user_next(struct user_namespace *user_ns, struct rb_node *n) { return __key_user_next(user_ns, rb_next(n)); } static struct rb_node *key_user_first(struct user_namespace *user_ns, struct rb_root *r) { struct rb_node *n = rb_first(r); return __key_user_next(user_ns, n); } static void *proc_key_users_start(struct seq_file *p, loff_t *_pos) __acquires(key_user_lock) { struct rb_node *_p; loff_t pos = *_pos; spin_lock(&key_user_lock); _p = key_user_first(seq_user_ns(p), &key_user_tree); while (pos > 0 && _p) { pos--; _p = key_user_next(seq_user_ns(p), _p); } return _p; } static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos) { (*_pos)++; return key_user_next(seq_user_ns(p), (struct rb_node *)v); } static void proc_key_users_stop(struct seq_file *p, void *v) __releases(key_user_lock) { spin_unlock(&key_user_lock); } static int proc_key_users_show(struct seq_file *m, void *v) { struct rb_node *_p = v; struct key_user *user = rb_entry(_p, struct key_user, node); unsigned maxkeys = uid_eq(user->uid, GLOBAL_ROOT_UID) ? key_quota_root_maxkeys : key_quota_maxkeys; unsigned maxbytes = uid_eq(user->uid, GLOBAL_ROOT_UID) ? key_quota_root_maxbytes : key_quota_maxbytes; seq_printf(m, "%5u: %5d %d/%d %d/%d %d/%d\n", from_kuid_munged(seq_user_ns(m), user->uid), refcount_read(&user->usage), atomic_read(&user->nkeys), atomic_read(&user->nikeys), user->qnkeys, maxkeys, user->qnbytes, maxbytes); return 0; }
14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 // SPDX-License-Identifier: GPL-2.0 /* * linux/net/sunrpc/auth_unix.c * * UNIX-style authentication; no AUTH_SHORT support * * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ #include <linux/slab.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/mempool.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/auth.h> #include <linux/user_namespace.h> #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif static struct rpc_auth unix_auth; static const struct rpc_credops unix_credops; static mempool_t *unix_pool; static struct rpc_auth * unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { refcount_inc(&unix_auth.au_count); return &unix_auth; } static void unx_destroy(struct rpc_auth *auth) { } /* * Lookup AUTH_UNIX creds for current process */ static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { struct rpc_cred *ret; ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask()); if (!ret) { if (!(flags & RPCAUTH_LOOKUP_ASYNC)) return ERR_PTR(-ENOMEM); ret = mempool_alloc(unix_pool, GFP_NOWAIT); if (!ret) return ERR_PTR(-ENOMEM); } rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; return ret; } static void unx_free_cred_callback(struct rcu_head *head) { struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu); put_cred(rpc_cred->cr_cred); mempool_free(rpc_cred, unix_pool); } static void unx_destroy_cred(struct rpc_cred *cred) { call_rcu(&cred->cr_rcu, unx_free_cred_callback); } /* * Match credentials against current the auth_cred. */ static int unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) { unsigned int groups = 0; unsigned int i; if (cred->cr_cred == acred->cred) return 1; if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid)) return 0; if (acred->cred->group_info != NULL) groups = acred->cred->group_info->ngroups; if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; if (cred->cr_cred->group_info == NULL) return groups == 0; if (groups != cred->cr_cred->group_info->ngroups) return 0; for (i = 0; i < groups ; i++) if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i])) return 0; return 1; } /* * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. */ static int unx_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_clnt *clnt = task->tk_client; struct rpc_cred *cred = task->tk_rqstp->rq_cred; __be32 *p, *cred_len, *gidarr_len; int i; struct group_info *gi = cred->cr_cred->group_info; struct user_namespace *userns = clnt->cl_cred ? clnt->cl_cred->user_ns : &init_user_ns; /* Credential */ p = xdr_reserve_space(xdr, 3 * sizeof(*p)); if (!p) goto marshal_failed; *p++ = rpc_auth_unix; cred_len = p++; *p++ = xdr_zero; /* stamp */ if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename, clnt->cl_nodelen) < 0) goto marshal_failed; p = xdr_reserve_space(xdr, 3 * sizeof(*p)); if (!p) goto marshal_failed; *p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid)); *p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid)); gidarr_len = p++; if (gi) for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) *p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i])); *gidarr_len = cpu_to_be32(p - gidarr_len - 1); *cred_len = cpu_to_be32((p - cred_len - 1) << 2); p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2); if (!p) goto marshal_failed; /* Verifier */ p = xdr_reserve_space(xdr, 2 * sizeof(*p)); if (!p) goto marshal_failed; *p++ = rpc_auth_null; *p = xdr_zero; return 0; marshal_failed: return -EMSGSIZE; } /* * Refresh credentials. This is a no-op for AUTH_UNIX */ static int unx_refresh(struct rpc_task *task) { set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } static int unx_validate(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth; __be32 *p; u32 size; p = xdr_inline_decode(xdr, 2 * sizeof(*p)); if (!p) return -EIO; switch (*p++) { case rpc_auth_null: case rpc_auth_unix: case rpc_auth_short: break; default: return -EIO; } size = be32_to_cpup(p); if (size > RPC_MAX_AUTH_SIZE) return -EIO; p = xdr_inline_decode(xdr, size); if (!p) return -EIO; auth->au_verfsize = XDR_QUADLEN(size) + 2; auth->au_rslack = XDR_QUADLEN(size) + 2; auth->au_ralign = XDR_QUADLEN(size) + 2; return 0; } int __init rpc_init_authunix(void) { unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred)); return unix_pool ? 0 : -ENOMEM; } void rpc_destroy_authunix(void) { mempool_destroy(unix_pool); } const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, .au_name = "UNIX", .create = unx_create, .destroy = unx_destroy, .lookup_cred = unx_lookup_cred, }; static struct rpc_auth unix_auth = { .au_cslack = UNX_CALLSLACK, .au_rslack = NUL_REPLYSLACK, .au_verfsize = NUL_REPLYSLACK, .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = REFCOUNT_INIT(1), }; static const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, .crmatch = unx_match, .crmarshal = unx_marshal, .crwrap_req = rpcauth_wrap_req_encode, .crrefresh = unx_refresh, .crvalidate = unx_validate, .crunwrap_resp = rpcauth_unwrap_resp_decode, };
162 160 162 162 162 105 161 58 58 58 248 248 248 248 248 42 42 42 244 244 244 247 134 247 157 285 35 281 279 279 95 194 194 285 5 5 5 5 5 5 5 5 261 261 261 261 261 261 261 257 261 4 251 250 6 261 247 246 246 245 244 242 1 3 5 4 4 4 4 4 162 1 162 40 162 161 162 162 162 162 162 161 162 6 5 5 2 2 2 8 8 5 5 1 1 4 3 8 5 4 4 1 3 3 3 1 5 253 253 248 40 247 253 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 // SPDX-License-Identifier: GPL-2.0-only /* * Fd transport layer. Includes deprecated socket layer. * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/kthread.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/un.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/file.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/syscalls.h> /* killme */ #define P9_PORT 564 #define MAX_SOCK_BUF (1024*1024) #define MAXPOLLWADDR 2 static struct p9_trans_module p9_tcp_trans; static struct p9_trans_module p9_fd_trans; /** * struct p9_fd_opts - per-transport options * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) * @privport: port is privileged */ struct p9_fd_opts { int rfd; int wfd; u16 port; bool privport; }; /* * Option Parsing (code inspired by NFS code) * - a little lazy - parse all fd-transport options */ enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, /* Options that take no arguments */ Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, {Opt_privport, "privport"}, {Opt_err, NULL}, }; enum { Rworksched = 1, /* read work scheduled or running */ Rpending = 2, /* can read */ Wworksched = 4, /* write work scheduled or running */ Wpending = 8, /* can write */ }; struct p9_poll_wait { struct p9_conn *conn; wait_queue_entry_t wait; wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information * @mux_list: list link for mux to manage multiple connections (?) * @client: reference to client instance for this connection * @err: error state * @req_lock: lock protecting req_list and requests statuses * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request * @wreq: write request * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads * @pt: poll state * @rq: current read work * @wq: current write work * @wsched: ???? * */ struct p9_conn { struct list_head mux_list; struct p9_client *client; int err; spinlock_t req_lock; struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; char *wbuf; struct list_head poll_pending_link; struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; /** * struct p9_trans_fd - transport state * @rd: reference to file to read from * @wr: reference of file to write to * @conn: connection state reference * */ struct p9_trans_fd { struct file *rd; struct file *wr; struct p9_conn conn; }; static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { struct p9_poll_wait *pwait = &m->poll_wait[i]; if (pwait->wait_addr) { remove_wait_queue(pwait->wait_addr, &pwait->wait); pwait->wait_addr = NULL; } } spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); flush_work(&p9_poll_work); } /** * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code * */ static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock(&m->req_lock); if (m->err) { spin_unlock(&m->req_lock); return; } m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); WRITE_ONCE(req->status, REQ_STATUS_ERROR); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); WRITE_ONCE(req->status, REQ_STATUS_ERROR); } spin_unlock(&m->req_lock); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); if (!req->t_err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } static __poll_t p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) { __poll_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status == Connected) ts = client->trans; if (!ts) { if (err) *err = -EREMOTEIO; return EPOLLERR; } ret = vfs_poll(ts->rd, pt); if (ts->rd != ts->wr) ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); return ret; } /** * p9_fd_read- read from a fd * @client: client instance * @v: buffer to receive data into * @len: size of receive buffer * */ static int p9_fd_read(struct p9_client *client, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; loff_t pos; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); pos = ts->rd->f_pos; ret = kernel_read(ts->rd, v, len, &pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_read_work - called when there is some data to be read from a transport * @work: container of work to be done * */ static void p9_read_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; m = container_of(work, struct p9_conn, rq); if (m->err < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n", m, m->rc.offset, m->rc.capacity, m->rc.capacity - m->rc.offset); err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset, m->rc.capacity - m->rc.offset); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err <= 0) goto error; m->rc.offset += err; /* header read in */ if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, "error parsing header: %d\n", err); goto error; } p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); m->rreq = p9_tag_lookup(m->client, m->rc.tag); if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", m->rc.tag); err = -EIO; goto error; } if (m->rc.size > m->rreq->rc.capacity) { p9_debug(P9_DEBUG_ERROR, "requested packet size too big: %d for tag %d with capacity %zd\n", m->rc.size, m->rc.tag, m->rreq->rc.capacity); err = -EIO; goto error; } if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); p9_req_put(m->client, m->rreq); m->rreq = NULL; err = -EIO; goto error; } m->rc.sdata = m->rreq->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } /* packet is read in * not an else because some packets (like clunk) have no payload */ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; spin_lock(&m->req_lock); if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); } else if (m->rreq->status == REQ_STATUS_FLSHD) { /* Ignore replies associated with a cancelled request. */ p9_debug(P9_DEBUG_TRANS, "Ignore replies associated with a cancelled request\n"); } else { spin_unlock(&m->req_lock); p9_debug(P9_DEBUG_ERROR, "Request tag %d errored out while we were reading the reply\n", m->rc.tag); err = -EIO; goto error; } spin_unlock(&m->req_lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; p9_req_put(m->client, m->rreq); m->rreq = NULL; } end_clear: clear_bit(Rworksched, &m->wsched); if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = EPOLLIN; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } return; error: p9_conn_cancel(m, err); clear_bit(Rworksched, &m->wsched); } /** * p9_fd_write - write to a socket * @client: client instance * @v: buffer to send data from * @len: size of send buffer * */ static int p9_fd_write(struct p9_client *client, void *v, int len) { ssize_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_write_work - called when a transport can send some data * @work: container for work to be done * */ static void p9_write_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); if (m->err < 0) { clear_bit(Wworksched, &m->wsched); return; } if (!m->wsize) { spin_lock(&m->req_lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); spin_unlock(&m->req_lock); return; } req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); WRITE_ONCE(req->status, REQ_STATUS_SENT); p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc.sdata; m->wsize = req->tc.size; m->wpos = 0; p9_req_get(req); m->wreq = req; spin_unlock(&m->req_lock); } p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err < 0) goto error; else if (err == 0) { err = -EREMOTEIO; goto error; } m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; p9_req_put(m->client, m->wreq); m->wreq = NULL; } end_clear: clear_bit(Wworksched, &m->wsched); if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLOUT) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } return; error: p9_conn_cancel(m, err); clear_bit(Wworksched, &m->wsched); } static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); schedule_work(&p9_poll_work); return 1; } /** * p9_pollwait - add poll task to the wait queue * @filp: file pointer being polled * @wait_address: wait_q to block on * @p: poll state * * called by files poll operation to add v9fs-poll task to files wait queue */ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct p9_conn *m = container_of(p, struct p9_conn, pt); struct p9_poll_wait *pwait = NULL; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { if (m->poll_wait[i].wait_addr == NULL) { pwait = &m->poll_wait[i]; break; } } if (!pwait) { p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } pwait->conn = m; pwait->wait_addr = wait_address; init_waitqueue_func_entry(&pwait->wait, p9_pollwake); add_wait_queue(wait_address, &pwait->wait); } /** * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ static void p9_conn_create(struct p9_client *client) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); INIT_LIST_HEAD(&m->mux_list); m->client = client; spin_lock_init(&m->req_lock); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); INIT_LIST_HEAD(&m->poll_pending_link); init_poll_funcptr(&m->pt, p9_pollwait); n = p9_fd_poll(client, &m->pt, NULL); if (n & EPOLLIN) { p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & EPOLLOUT) { p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } } /** * p9_poll_mux - polls a mux and schedules read or write works if necessary * @m: connection to poll * */ static void p9_poll_mux(struct p9_conn *m) { __poll_t n; int err = -ECONNRESET; if (m->err < 0) return; n = p9_fd_poll(m->client, NULL, &err); if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); p9_conn_cancel(m, err); } if (n & EPOLLIN) { set_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & EPOLLOUT) { set_bit(Wpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } } /** * p9_fd_request - send 9P request * The function can sleep until the request is scheduled for sending. * The function can be interrupted. Return from the function is not * a guarantee that the request is sent successfully. * * @client: client instance * @req: request to be sent * */ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, &req->tc, req->tc.id); spin_lock(&m->req_lock); if (m->err < 0) { spin_unlock(&m->req_lock); return m->err; } WRITE_ONCE(req->status, REQ_STATUS_UNSENT); list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&m->req_lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) schedule_work(&m->wq); return 0; } static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; int ret = 1; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); WRITE_ONCE(req->status, REQ_STATUS_FLSHD); p9_req_put(client, req); ret = 0; } spin_unlock(&m->req_lock); return ret; } static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); /* Ignore cancelled request if message has been received * before lock. */ if (req->status == REQ_STATUS_RCVD) { spin_unlock(&m->req_lock); return 0; } /* we haven't received a response for oldreq, * remove it from the list. */ list_del(&req->req_list); WRITE_ONCE(req->status, REQ_STATUS_FLSHD); spin_unlock(&m->req_lock); p9_req_put(client, req); return 0; } static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->trans_mod == &p9_tcp_trans) { if (clnt->trans_opts.tcp.port != P9_PORT) seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port); } else if (clnt->trans_mod == &p9_fd_trans) { if (clnt->trans_opts.fd.rfd != ~0) seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd); if (clnt->trans_opts.fd.wfd != ~0) seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd); } return 0; } /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; opts->privport = false; if (!params) return 0; tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { p9_debug(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; int r; if (!*p) continue; token = match_token(p, tokens, args); if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } } switch (token) { case Opt_port: opts->port = option; break; case Opt_rfdno: opts->rfd = option; break; case Opt_wfdno: opts->wfd = option; break; case Opt_privport: opts->privport = true; break; default: continue; } } kfree(tmp_options); return 0; } static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; ts->rd = fget(rfd); if (!ts->rd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; /* Prevent workers from hanging on IO when fd is a pipe. * It's technically possible for userspace or concurrent mounts to * modify this flag concurrently, which will likely result in a * broken filesystem. However, just having bad flags here should * not crash the kernel or cause any other sort of bug, so mark this * particular data race as intentional so that tooling (like KCSAN) * can allow it and detect further problems. */ data_race(ts->rd->f_flags |= O_NONBLOCK); ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; data_race(ts->wr->f_flags |= O_NONBLOCK); client->trans = ts; client->status = Connected; return 0; out_put_wr: fput(ts->wr); out_put_rd: fput(ts->rd); out_free_ts: kfree(ts); return -EIO; } static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) { sock_release(csocket); return -ENOMEM; } csocket->sk->sk_allocation = GFP_NOIO; csocket->sk->sk_use_task_frag = false; file = sock_alloc_file(csocket, 0, NULL); if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); kfree(p); return PTR_ERR(file); } get_file(file); p->wr = p->rd = file; client->trans = p; client->status = Connected; p->rd->f_flags |= O_NONBLOCK; p9_conn_create(client); return 0; } /** * p9_conn_destroy - cancels all pending requests of mux * @m: mux to destroy * */ static void p9_conn_destroy(struct p9_conn *m) { p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { p9_req_put(m->client, m->rreq); m->rreq = NULL; } cancel_work_sync(&m->wq); if (m->wreq) { p9_req_put(m->client, m->wreq); m->wreq = NULL; } p9_conn_cancel(m, -ECONNRESET); m->client = NULL; } /** * p9_fd_close - shutdown file descriptor transport * @client: client instance * */ static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; if (!client) return; ts = client->trans; if (!ts) return; client->status = Disconnected; p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); kfree(ts); } /* * stolen from NFS - maybe should be made a generic function? */ static inline int valid_ipaddr4(const char *buf) { int rc, count, in[4]; rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); if (rc != 4) return -EINVAL; for (count = 0; count < 4; count++) { if (in[count] > 255) return -EINVAL; } return 0; } static int p9_bind_privport(struct socket *sock) { struct sockaddr_in cl; int port, err = -EINVAL; memset(&cl, 0, sizeof(cl)); cl.sin_family = AF_INET; cl.sin_addr.s_addr = htonl(INADDR_ANY); for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { cl.sin_port = htons((ushort)port); err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); if (err != -EADDRINUSE) break; } return err; } static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; if (addr == NULL || valid_ipaddr4(addr) < 0) return -EINVAL; csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } if (opts.privport) { err = p9_bind_privport(csocket); if (err < 0) { pr_err("%s (%d): problem binding to privport\n", __func__, task_pid_nr(current)); sock_release(csocket); return err; } } err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; csocket = NULL; if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { pr_err("%s (%d): problem connecting socket: %s: %d\n", __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; client->trans_opts.fd.rfd = opts.rfd; client->trans_opts.fd.wfd = opts.wfd; if (opts.rfd == ~0 || opts.wfd == ~0) { pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) return err; p9_conn_create(client); return 0; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .pooled_rbuffers = false, .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("tcp"); static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_unix, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("unix"); static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; MODULE_ALIAS_9P("fd"); /** * p9_poll_workfn - poll worker thread * @work: work queue * * polls all v9fs transports for new events and queues the appropriate * work to the work queue * */ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, struct p9_conn, poll_pending_link); list_del_init(&conn->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); p9_poll_mux(conn); spin_lock_irqsave(&p9_poll_lock, flags); } spin_unlock_irqrestore(&p9_poll_lock, flags); p9_debug(P9_DEBUG_TRANS, "finish\n"); } static int __init p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); return 0; } static void __exit p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); } module_init(p9_trans_fd_init); module_exit(p9_trans_fd_exit); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_DESCRIPTION("Filedescriptor Transport for 9P"); MODULE_LICENSE("GPL");
2199 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_TIMEX_H #define _ASM_X86_TIMEX_H #include <asm/processor.h> #include <asm/tsc.h> static inline unsigned long random_get_entropy(void) { if (!IS_ENABLED(CONFIG_X86_TSC) && !cpu_feature_enabled(X86_FEATURE_