Total coverage: 353649 (18%)of 1975179
3 3 2 1 1 1 32 33 33 32 33 19 31 32 2 1 2 1 1 26 14 6 4 5 2 1 2 4 4 3 7 7 6 4 4 2 2 2 1 5 5 1 1 1 5 4 3 1 4 3 1 1 2 2 1 7 6 4 3 2 2 1 3 2 2 2 1 1 1 1 1 1 1 6 2 2 1 2 1 2 1 1 1 2 2 3 1 1 1 1 1 12 32 33 9 33 32 33 33 32 33 32 32 33 1 33 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation * * element parsing for mac80211 */ #include <net/mac80211.h> #include <linux/netdevice.h> #include <linux/export.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/bitmap.h> #include <linux/crc32.h> #include <net/net_namespace.h> #include <net/cfg80211.h> #include <net/rtnetlink.h> #include <kunit/visibility.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "mesh.h" #include "wme.h" #include "led.h" #include "wep.h" struct ieee80211_elems_parse { /* must be first for kfree to work */ struct ieee802_11_elems elems; /* The basic Multi-Link element in the original elements */ const struct element *ml_basic_elem; /* The reconfiguration Multi-Link element in the original elements */ const struct element *ml_reconf_elem; /* * scratch buffer that can be used for various element parsing related * tasks, e.g., element de-fragmentation etc. */ size_t scratch_len; u8 *scratch_pos; u8 scratch[] __counted_by(scratch_len); }; static void ieee80211_parse_extension_element(u32 *crc, const struct element *elem, struct ieee80211_elems_parse *elems_parse, struct ieee80211_elems_parse_params *params) { struct ieee802_11_elems *elems = &elems_parse->elems; const void *data = elem->data + 1; bool calc_crc = false; u8 len; if (!elem->datalen) return; len = elem->datalen - 1; switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: if (params->mode < IEEE80211_CONN_MODE_HE) break; calc_crc = true; if (len >= sizeof(*elems->mu_edca_param_set)) elems->mu_edca_param_set = data; break; case WLAN_EID_EXT_HE_CAPABILITY: if (params->mode < IEEE80211_CONN_MODE_HE) break; if (ieee80211_he_capa_size_ok(data, len)) { elems->he_cap = data; elems->he_cap_len = len; } break; case WLAN_EID_EXT_HE_OPERATION: if (params->mode < IEEE80211_CONN_MODE_HE) break; calc_crc = true; if (len >= sizeof(*elems->he_operation) && len >= ieee80211_he_oper_size(data) - 1) elems->he_operation = data; break; case WLAN_EID_EXT_UORA: if (params->mode < IEEE80211_CONN_MODE_HE) break; if (len >= 1) elems->uora_element = data; break; case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: if (len == 3) elems->max_channel_switch_time = data; break; case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: if (len >= sizeof(*elems->mbssid_config_ie)) elems->mbssid_config_ie = data; break; case WLAN_EID_EXT_HE_SPR: if (params->mode < IEEE80211_CONN_MODE_HE) break; if (len >= sizeof(*elems->he_spr) && len >= ieee80211_he_spr_size(data) - 1) elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: if (params->mode < IEEE80211_CONN_MODE_HE) break; if (len >= sizeof(*elems->he_6ghz_capa)) elems->he_6ghz_capa = data; break; case WLAN_EID_EXT_EHT_CAPABILITY: if (params->mode < IEEE80211_CONN_MODE_EHT) break; if (ieee80211_eht_capa_size_ok(elems->he_cap, data, len, params->from_ap)) { elems->eht_cap = data; elems->eht_cap_len = len; } break; case WLAN_EID_EXT_EHT_OPERATION: if (params->mode < IEEE80211_CONN_MODE_EHT) break; if (ieee80211_eht_oper_size_ok(data, len)) elems->eht_operation = data; calc_crc = true; break; case WLAN_EID_EXT_EHT_MULTI_LINK: if (params->mode < IEEE80211_CONN_MODE_EHT) break; calc_crc = true; if (ieee80211_mle_size_ok(data, len)) { const struct ieee80211_multi_link_elem *mle = (void *)data; switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: if (elems_parse->ml_basic_elem) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } elems_parse->ml_basic_elem = elem; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: elems_parse->ml_reconf_elem = elem; break; default: break; } } break; case WLAN_EID_EXT_BANDWIDTH_INDICATION: if (params->mode < IEEE80211_CONN_MODE_EHT) break; if (ieee80211_bandwidth_indication_size_ok(data, len)) elems->bandwidth_indication = data; calc_crc = true; break; case WLAN_EID_EXT_TID_TO_LINK_MAPPING: if (params->mode < IEEE80211_CONN_MODE_EHT) break; calc_crc = true; if (ieee80211_tid_to_link_map_size_ok(data, len) && elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { elems->ttlm[elems->ttlm_num] = (void *)data; elems->ttlm_num++; } break; } if (crc && calc_crc) *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); } static void ieee80211_parse_tpe(struct ieee80211_parsed_tpe *tpe, const u8 *data, u8 len) { const struct ieee80211_tx_pwr_env *env = (const void *)data; u8 count, interpret, category; u8 *out, N, *cnt_out = NULL, *N_out = NULL; if (!ieee80211_valid_tpe_element(data, len)) return; count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); switch (interpret) { case IEEE80211_TPE_LOCAL_EIRP: out = tpe->max_local[category].power; cnt_out = &tpe->max_local[category].count; tpe->max_local[category].valid = true; break; case IEEE80211_TPE_REG_CLIENT_EIRP: out = tpe->max_reg_client[category].power; cnt_out = &tpe->max_reg_client[category].count; tpe->max_reg_client[category].valid = true; break; case IEEE80211_TPE_LOCAL_EIRP_PSD: out = tpe->psd_local[category].power; cnt_out = &tpe->psd_local[category].count; N_out = &tpe->psd_local[category].n; tpe->psd_local[category].valid = true; break; case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: out = tpe->psd_reg_client[category].power; cnt_out = &tpe->psd_reg_client[category].count; N_out = &tpe->psd_reg_client[category].n; tpe->psd_reg_client[category].valid = true; break; } switch (interpret) { case IEEE80211_TPE_LOCAL_EIRP: case IEEE80211_TPE_REG_CLIENT_EIRP: /* count was validated <= 3, plus 320 MHz */ BUILD_BUG_ON(IEEE80211_TPE_EIRP_ENTRIES_320MHZ < 5); memcpy(out, env->variable, count + 1); *cnt_out = count + 1; /* separately take 320 MHz if present */ if (count == 3 && len > sizeof(*env) + count + 1) { out[4] = env->variable[4]; *cnt_out = 5; } break; case IEEE80211_TPE_LOCAL_EIRP_PSD: case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: if (!count) { memset(out, env->variable[0], IEEE80211_TPE_PSD_ENTRIES_320MHZ); *cnt_out = IEEE80211_TPE_PSD_ENTRIES_320MHZ; break; } N = 1 << (count - 1); memcpy(out, env->variable, N); *cnt_out = N; *N_out = N; if (len > sizeof(*env) + N) { int K = u8_get_bits(env->variable[N], IEEE80211_TX_PWR_ENV_EXT_COUNT); K = min(K, IEEE80211_TPE_PSD_ENTRIES_320MHZ - N); memcpy(out + N, env->variable + N + 1, K); (*cnt_out) += K; } break; } } static u32 _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, struct ieee80211_elems_parse *elems_parse, const struct element *check_inherit) { struct ieee802_11_elems *elems = &elems_parse->elems; const struct element *elem; bool calc_crc = params->filter != 0; DECLARE_BITMAP(seen_elems, 256); u32 crc = params->crc; bitmap_zero(seen_elems, 256); for_each_element(elem, params->start, params->len) { const struct element *subelem; u8 elem_parse_failed; u8 id = elem->id; u8 elen = elem->datalen; const u8 *pos = elem->data; if (check_inherit && !cfg80211_is_element_inherited(elem, check_inherit)) continue; switch (id) { case WLAN_EID_SSID: case WLAN_EID_SUPP_RATES: case WLAN_EID_FH_PARAMS: case WLAN_EID_DS_PARAMS: case WLAN_EID_CF_PARAMS: case WLAN_EID_TIM: case WLAN_EID_IBSS_PARAMS: case WLAN_EID_CHALLENGE: case WLAN_EID_RSN: case WLAN_EID_ERP_INFO: case WLAN_EID_EXT_SUPP_RATES: case WLAN_EID_HT_CAPABILITY: case WLAN_EID_HT_OPERATION: case WLAN_EID_VHT_CAPABILITY: case WLAN_EID_VHT_OPERATION: case WLAN_EID_MESH_ID: case WLAN_EID_MESH_CONFIG: case WLAN_EID_PEER_MGMT: case WLAN_EID_PREQ: case WLAN_EID_PREP: case WLAN_EID_PERR: case WLAN_EID_RANN: case WLAN_EID_CHANNEL_SWITCH: case WLAN_EID_EXT_CHANSWITCH_ANN: case WLAN_EID_COUNTRY: case WLAN_EID_PWR_CONSTRAINT: case WLAN_EID_TIMEOUT_INTERVAL: case WLAN_EID_SECONDARY_CHANNEL_OFFSET: case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: case WLAN_EID_CHAN_SWITCH_PARAM: case WLAN_EID_EXT_CAPABILITY: case WLAN_EID_CHAN_SWITCH_TIMING: case WLAN_EID_LINK_ID: case WLAN_EID_BSS_MAX_IDLE_PERIOD: case WLAN_EID_RSNX: case WLAN_EID_S1G_BCN_COMPAT: case WLAN_EID_S1G_CAPABILITIES: case WLAN_EID_S1G_OPERATION: case WLAN_EID_AID_RESPONSE: case WLAN_EID_S1G_SHORT_BCN_INTERVAL: /* * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible * that if the content gets bigger it might be needed more than once */ if (test_bit(id, seen_elems)) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_ELEM; continue; } break; } if (calc_crc && id < 64 && (params->filter & (1ULL << id))) crc = crc32_be(crc, pos - 2, elen + 2); elem_parse_failed = 0; switch (id) { case WLAN_EID_LINK_ID: if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->lnk_id = (void *)(pos - 2); break; case WLAN_EID_CHAN_SWITCH_TIMING: if (elen < sizeof(struct ieee80211_ch_switch_timing)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->ch_sw_timing = (void *)pos; break; case WLAN_EID_EXT_CAPABILITY: elems->ext_capab = pos; elems->ext_capab_len = elen; break; case WLAN_EID_SSID: elems->ssid = pos; elems->ssid_len = elen; break; case WLAN_EID_SUPP_RATES: elems->supp_rates = pos; elems->supp_rates_len = elen; break; case WLAN_EID_DS_PARAMS: if (elen >= 1) elems->ds_params = pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_TIM: if (elen >= sizeof(struct ieee80211_tim_ie)) { elems->tim = (void *)pos; elems->tim_len = elen; } else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VENDOR_SPECIFIC: if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { /* Microsoft OUI (00:50:F2) */ if (calc_crc) crc = crc32_be(crc, pos - 2, elen + 2); if (elen >= 5 && pos[3] == 2) { /* OUI Type 2 - WMM IE */ if (pos[4] == 0) { elems->wmm_info = pos; elems->wmm_info_len = elen; } else if (pos[4] == 1) { elems->wmm_param = pos; elems->wmm_param_len = elen; } } } break; case WLAN_EID_RSN: elems->rsn = pos; elems->rsn_len = elen; break; case WLAN_EID_ERP_INFO: if (elen >= 1) elems->erp_info = pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_EXT_SUPP_RATES: elems->ext_supp_rates = pos; elems->ext_supp_rates_len = elen; break; case WLAN_EID_HT_CAPABILITY: if (params->mode < IEEE80211_CONN_MODE_HT) break; if (elen >= sizeof(struct ieee80211_ht_cap)) elems->ht_cap_elem = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_HT_OPERATION: if (params->mode < IEEE80211_CONN_MODE_HT) break; if (elen >= sizeof(struct ieee80211_ht_operation)) elems->ht_operation = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VHT_CAPABILITY: if (params->mode < IEEE80211_CONN_MODE_VHT) break; if (elen >= sizeof(struct ieee80211_vht_cap)) elems->vht_cap_elem = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VHT_OPERATION: if (params->mode < IEEE80211_CONN_MODE_VHT) break; if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; if (calc_crc) crc = crc32_be(crc, pos - 2, elen + 2); break; } elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_OPMODE_NOTIF: if (params->mode < IEEE80211_CONN_MODE_VHT) break; if (elen > 0) { elems->opmode_notif = pos; if (calc_crc) crc = crc32_be(crc, pos - 2, elen + 2); break; } elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; elems->mesh_id_len = elen; break; case WLAN_EID_MESH_CONFIG: if (elen >= sizeof(struct ieee80211_meshconf_ie)) elems->mesh_config = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_PEER_MGMT: elems->peering = pos; elems->peering_len = elen; break; case WLAN_EID_MESH_AWAKE_WINDOW: if (elen >= 2) elems->awake_window = (void *)pos; break; case WLAN_EID_PREQ: elems->preq = pos; elems->preq_len = elen; break; case WLAN_EID_PREP: elems->prep = pos; elems->prep_len = elen; break; case WLAN_EID_PERR: elems->perr = pos; elems->perr_len = elen; break; case WLAN_EID_RANN: if (elen >= sizeof(struct ieee80211_rann_ie)) elems->rann = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_CHANNEL_SWITCH: if (elen != sizeof(struct ieee80211_channel_sw_ie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->ch_switch_ie = (void *)pos; break; case WLAN_EID_EXT_CHANSWITCH_ANN: if (elen != sizeof(struct ieee80211_ext_chansw_ie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->ext_chansw_ie = (void *)pos; break; case WLAN_EID_SECONDARY_CHANNEL_OFFSET: if (params->mode < IEEE80211_CONN_MODE_HT) break; if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->sec_chan_offs = (void *)pos; break; case WLAN_EID_CHAN_SWITCH_PARAM: if (elen < sizeof(*elems->mesh_chansw_params_ie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->mesh_chansw_params_ie = (void *)pos; break; case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: if (params->mode < IEEE80211_CONN_MODE_VHT) break; if (!params->action) { elem_parse_failed = IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; break; } if (elen < sizeof(*elems->wide_bw_chansw_ie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->wide_bw_chansw_ie = (void *)pos; break; case WLAN_EID_CHANNEL_SWITCH_WRAPPER: if (params->mode < IEEE80211_CONN_MODE_VHT) break; if (params->action) { elem_parse_failed = IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; break; } /* * This is a bit tricky, but as we only care about * a few elements, parse them out manually. */ subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, pos, elen); if (subelem) { if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = (void *)subelem->data; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; } if (params->mode < IEEE80211_CONN_MODE_EHT) break; subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, pos, elen); if (subelem) { const void *edata = subelem->data + 1; u8 edatalen = subelem->datalen - 1; if (ieee80211_bandwidth_indication_size_ok(edata, edatalen)) elems->bandwidth_indication = edata; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; } subelem = cfg80211_find_ext_elem(WLAN_EID_TX_POWER_ENVELOPE, pos, elen); if (subelem) ieee80211_parse_tpe(&elems->csa_tpe, subelem->data + 1, subelem->datalen - 1); break; case WLAN_EID_COUNTRY: elems->country_elem = pos; elems->country_elem_len = elen; break; case WLAN_EID_PWR_CONSTRAINT: if (elen != 1) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->pwr_constr_elem = pos; break; case WLAN_EID_CISCO_VENDOR_SPECIFIC: /* Lots of different options exist, but we only care * about the Dynamic Transmit Power Control element. * First check for the Cisco OUI, then for the DTPC * tag (0x00). */ if (elen < 4) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } if (pos[0] != 0x00 || pos[1] != 0x40 || pos[2] != 0x96 || pos[3] != 0x00) break; if (elen != 6) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } if (calc_crc) crc = crc32_be(crc, pos - 2, elen + 2); elems->cisco_dtpc_elem = pos; break; case WLAN_EID_ADDBA_EXT: if (elen < sizeof(struct ieee80211_addba_ext_ie)) { elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->addba_ext_ie = (void *)pos; break; case WLAN_EID_TIMEOUT_INTERVAL: if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) elems->timeout_int = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_BSS_MAX_IDLE_PERIOD: if (elen >= sizeof(*elems->max_idle_period_ie)) elems->max_idle_period_ie = (void *)pos; break; case WLAN_EID_RSNX: elems->rsnx = pos; elems->rsnx_len = elen; break; case WLAN_EID_TX_POWER_ENVELOPE: if (params->mode < IEEE80211_CONN_MODE_HE) break; ieee80211_parse_tpe(&elems->tpe, pos, elen); break; case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? &crc : NULL, elem, elems_parse, params); break; case WLAN_EID_S1G_CAPABILITIES: if (params->mode != IEEE80211_CONN_MODE_S1G) break; if (elen >= sizeof(*elems->s1g_capab)) elems->s1g_capab = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_S1G_OPERATION: if (params->mode != IEEE80211_CONN_MODE_S1G) break; if (elen == sizeof(*elems->s1g_oper)) elems->s1g_oper = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_S1G_BCN_COMPAT: if (params->mode != IEEE80211_CONN_MODE_S1G) break; if (elen == sizeof(*elems->s1g_bcn_compat)) elems->s1g_bcn_compat = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_AID_RESPONSE: if (params->mode != IEEE80211_CONN_MODE_S1G) break; if (elen == sizeof(struct ieee80211_aid_response_ie)) elems->aid_resp = (void *)pos; else elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; default: break; } if (elem_parse_failed) elems->parse_error |= elem_parse_failed; else __set_bit(id, seen_elems); } if (!for_each_element_completed(elem, params->start, params->len)) elems->parse_error |= IEEE80211_PARSE_ERR_INVALID_END; return crc; } static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, struct ieee802_11_elems *elems, struct cfg80211_bss *bss, u8 *nontransmitted_profile) { const struct element *elem, *sub; size_t profile_len = 0; bool found = false; if (!bss || !bss->transmitted_bss) return profile_len; for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) { if (elem->datalen < 2) continue; if (elem->data[0] < 1 || elem->data[0] > 8) continue; for_each_element(sub, elem->data + 1, elem->datalen - 1) { u8 new_bssid[ETH_ALEN]; const u8 *index; if (sub->id != 0 || sub->datalen < 4) { /* not a valid BSS profile */ continue; } if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP || sub->data[1] != 2) { /* The first element of the * Nontransmitted BSSID Profile is not * the Nontransmitted BSSID Capability * element. */ continue; } memset(nontransmitted_profile, 0, len); profile_len = cfg80211_merge_profile(start, len, elem, sub, nontransmitted_profile, len); /* found a Nontransmitted BSSID Profile */ index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, nontransmitted_profile, profile_len); if (!index || index[1] < 1 || index[2] == 0) { /* Invalid MBSSID Index element */ continue; } cfg80211_gen_new_bssid(bss->transmitted_bss->bssid, elem->data[0], index[2], new_bssid); if (ether_addr_equal(new_bssid, bss->bssid)) { found = true; elems->bssid_index_len = index[1]; elems->bssid_index = (void *)&index[2]; break; } } } return found ? profile_len : 0; } static void ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse, u8 link_id) { struct ieee802_11_elems *elems = &elems_parse->elems; const struct ieee80211_multi_link_elem *ml = elems->ml_basic; ssize_t ml_len = elems->ml_basic_len; const struct element *sub; for_each_mle_subelement(sub, (u8 *)ml, ml_len) { struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; ssize_t sta_prof_len; u16 control; if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) continue; if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data, sub->datalen)) return; control = le16_to_cpu(prof->control); if (link_id != u16_get_bits(control, IEEE80211_MLE_STA_CONTROL_LINK_ID)) continue; if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE)) return; /* the sub element can be fragmented */ sta_prof_len = cfg80211_defragment_element(sub, (u8 *)ml, ml_len, elems_parse->scratch_pos, elems_parse->scratch + elems_parse->scratch_len - elems_parse->scratch_pos, IEEE80211_MLE_SUBELEM_FRAGMENT); if (sta_prof_len < 0) return; elems->prof = (void *)elems_parse->scratch_pos; elems->sta_prof_len = sta_prof_len; elems_parse->scratch_pos += sta_prof_len; return; } } static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, struct ieee80211_elems_parse_params *params) { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; struct ieee80211_elems_parse_params sub = { .mode = params->mode, .action = params->action, .from_ap = params->from_ap, .link_id = -1, }; ssize_t ml_len = elems->ml_basic_len; const struct element *non_inherit = NULL; const u8 *end; ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem, elems->ie_start, elems->total_len, elems_parse->scratch_pos, elems_parse->scratch + elems_parse->scratch_len - elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) return; elems->ml_basic = (const void *)elems_parse->scratch_pos; elems->ml_basic_len = ml_len; elems_parse->scratch_pos += ml_len; if (params->link_id == -1) return; ieee80211_mle_get_sta_prof(elems_parse, params->link_id); prof = elems->prof; if (!prof) return; /* check if we have the 4 bytes for the fixed part in assoc response */ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { elems->prof = NULL; elems->sta_prof_len = 0; return; } /* * Skip the capability information and the status code that are expected * as part of the station profile in association response frames. Note * the -1 is because the 'sta_info_len' is accounted to as part of the * per-STA profile, but not part of the 'u8 variable[]' portion. */ sub.start = prof->variable + prof->sta_info_len - 1 + 4; end = (const u8 *)prof + elems->sta_prof_len; sub.len = end - sub.start; non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, sub.start, sub.len); _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit); } static void ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse) { struct ieee802_11_elems *elems = &elems_parse->elems; ssize_t ml_len; ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem, elems->ie_start, elems->total_len, elems_parse->scratch_pos, elems_parse->scratch + elems_parse->scratch_len - elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) return; elems->ml_reconf = (void *)elems_parse->scratch_pos; elems->ml_reconf_len = ml_len; elems_parse->scratch_pos += ml_len; } struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { struct ieee80211_elems_parse *elems_parse; struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; size_t scratch_len = 3 * params->len; BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len), GFP_ATOMIC); if (!elems_parse) return NULL; elems_parse->scratch_len = scratch_len; elems_parse->scratch_pos = elems_parse->scratch; elems = &elems_parse->elems; elems->ie_start = params->start; elems->total_len = params->len; /* set all TPE entries to unlimited (but invalid) */ ieee80211_clear_tpe(&elems->tpe); ieee80211_clear_tpe(&elems->csa_tpe); nontransmitted_profile = elems_parse->scratch_pos; nontransmitted_profile_len = ieee802_11_find_bssid_profile(params->start, params->len, elems, params->bss, nontransmitted_profile); elems_parse->scratch_pos += nontransmitted_profile_len; non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, nontransmitted_profile, nontransmitted_profile_len); elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); /* Override with nontransmitted profile, if found */ if (nontransmitted_profile_len) { struct ieee80211_elems_parse_params sub = { .mode = params->mode, .start = nontransmitted_profile, .len = nontransmitted_profile_len, .action = params->action, .link_id = params->link_id, }; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } ieee80211_mle_parse_link(elems_parse, params); ieee80211_mle_defrag_reconf(elems_parse); if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; elems->dtim_period = tim_ie->dtim_period; elems->dtim_count = tim_ie->dtim_count; } /* Override DTIM period and count if needed */ if (elems->bssid_index && elems->bssid_index_len >= offsetofend(struct ieee80211_bssid_index, dtim_period)) elems->dtim_period = elems->bssid_index->dtim_period; if (elems->bssid_index && elems->bssid_index_len >= offsetofend(struct ieee80211_bssid_index, dtim_count)) elems->dtim_count = elems->bssid_index->dtim_count; return elems; } EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full); int ieee80211_parse_bitrates(enum nl80211_chan_width width, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates) { u32 rate_flags = ieee80211_chanwidth_rate_flags(width); struct ieee80211_rate *br; int brate, rate, i, j, count = 0; *rates = 0; for (i = 0; i < srates_len; i++) { rate = srates[i] & 0x7f; for (j = 0; j < sband->n_bitrates; j++) { br = &sband->bitrates[j]; if ((rate_flags & br->flags) != rate_flags) continue; brate = DIV_ROUND_UP(br->bitrate, 5); if (brate == rate) { *rates |= BIT(j); count++; break; } } } return count; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _UFS_UFS_H #define _UFS_UFS_H 1 #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define UFS_MAX_GROUP_LOADED 8 #define UFS_CGNO_EMPTY ((unsigned)-1) struct ufs_sb_private_info; struct ufs_cg_private_info; struct ufs_csum; struct ufs_sb_info { struct ufs_sb_private_info * s_uspi; struct ufs_csum * s_csp; unsigned s_bytesex; unsigned s_flags; struct buffer_head ** s_ucg; struct ufs_cg_private_info * s_ucpi[UFS_MAX_GROUP_LOADED]; unsigned s_cgno[UFS_MAX_GROUP_LOADED]; unsigned short s_cg_loaded; unsigned s_mount_opt; struct super_block *sb; int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ struct mutex s_lock; }; struct ufs_inode_info { union { __fs32 i_data[15]; __u8 i_symlink[2 * 4 * 15]; __fs64 u2_i_data[15]; } i_u1; __u32 i_flags; __u32 i_shadow; __u32 i_unused1; __u32 i_unused2; __u32 i_oeftflag; __u16 i_osync; __u64 i_lastfrag; seqlock_t meta_lock; struct mutex truncate_mutex; __u32 i_dir_start_lookup; struct inode vfs_inode; }; /* mount options */ #define UFS_MOUNT_ONERROR 0x0000000F #define UFS_MOUNT_ONERROR_PANIC 0x00000001 #define UFS_MOUNT_ONERROR_LOCK 0x00000002 #define UFS_MOUNT_ONERROR_UMOUNT 0x00000004 #define UFS_MOUNT_ONERROR_REPAIR 0x00000008 #define UFS_MOUNT_UFSTYPE 0x0000FFF0 #define UFS_MOUNT_UFSTYPE_OLD 0x00000010 #define UFS_MOUNT_UFSTYPE_44BSD 0x00000020 #define UFS_MOUNT_UFSTYPE_SUN 0x00000040 #define UFS_MOUNT_UFSTYPE_NEXTSTEP 0x00000080 #define UFS_MOUNT_UFSTYPE_NEXTSTEP_CD 0x00000100 #define UFS_MOUNT_UFSTYPE_OPENSTEP 0x00000200 #define UFS_MOUNT_UFSTYPE_SUNx86 0x00000400 #define UFS_MOUNT_UFSTYPE_HP 0x00000800 #define UFS_MOUNT_UFSTYPE_UFS2 0x00001000 #define UFS_MOUNT_UFSTYPE_SUNOS 0x00002000 #define ufs_clear_opt(o,opt) o &= ~UFS_MOUNT_##opt #define ufs_set_opt(o,opt) o |= UFS_MOUNT_##opt #define ufs_test_opt(o,opt) ((o) & UFS_MOUNT_##opt) /* * Debug code */ #ifdef CONFIG_UFS_DEBUG # define UFSD(f, a...) { \ pr_debug("UFSD (%s, %d): %s:", \ __FILE__, __LINE__, __func__); \ pr_debug(f, ## a); \ } #else # define UFSD(f, a...) /**/ #endif /* balloc.c */ void ufs_free_fragments (struct inode *, u64 fragment, unsigned count); void ufs_free_blocks (struct inode *, u64 fragment, unsigned count); u64 ufs_new_fragments(struct inode *, void *, u64 fragment, u64 goal, unsigned count, int *err, struct folio *); /* cylinder.c */ extern struct ufs_cg_private_info * ufs_load_cylinder (struct super_block *, unsigned); extern void ufs_put_cylinder (struct super_block *, unsigned); /* dir.c */ extern const struct inode_operations ufs_dir_inode_operations; int ufs_add_link(struct dentry *, struct inode *); ino_t ufs_inode_by_name(struct inode *, const struct qstr *); int ufs_make_empty(struct inode *, struct inode *); struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct qstr *, struct folio **); int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct folio *); int ufs_empty_dir(struct inode *); struct ufs_dir_entry *ufs_dotdot(struct inode *, struct folio **); int ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, struct folio *folio, struct inode *inode, bool update_times); /* file.c */ extern const struct inode_operations ufs_file_inode_operations; extern const struct file_operations ufs_file_operations; extern const struct address_space_operations ufs_aops; /* ialloc.c */ extern void ufs_free_inode (struct inode *inode); extern struct inode * ufs_new_inode (struct inode *, umode_t); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); extern void ufs_evict_inode (struct inode *); extern int ufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); /* namei.c */ extern const struct file_operations ufs_dir_operations; /* super.c */ extern __printf(3, 4) void ufs_warning(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ufs_error(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ufs_panic(struct super_block *, const char *, const char *, ...); void ufs_mark_sb_dirty(struct super_block *sb); static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) { return sb->s_fs_info; } static inline struct ufs_inode_info *UFS_I(struct inode *inode) { return container_of(inode, struct ufs_inode_info, vfs_inode); } /* * Give cylinder group number for a file system block. * Give cylinder group block number for a file system block. */ /* #define ufs_dtog(d) ((d) / uspi->s_fpg) */ static inline u64 ufs_dtog(struct ufs_sb_private_info * uspi, u64 b) { do_div(b, uspi->s_fpg); return b; } /* #define ufs_dtogd(d) ((d) % uspi->s_fpg) */ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b) { return do_div(b, uspi->s_fpg); } #endif /* _UFS_UFS_H */
11307 1043 1032 11312 987 10681 714 10712 10625 11295 11298 11315 11274 11284 11287 11297 59 309 322 59 318 318 322 311 312 306 120 310 311 308 311 2 312 306 311 307 310 309 309 2 2 117 31 103 118 23 23 1 11 10 3 19 22 5 22 17 17 5 17 9 11 11 7 8 8 7 7 7 7 7 11 7 11 9525 1129 258 105 723 592 592 592 591 592 592 376 39 543 513 592 592 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 // SPDX-License-Identifier: GPL-2.0-only /* * 32bit Socket syscall emulation. Based on arch/sparc64/kernel/sys_sparc32.c. * * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 2000,2001 Andi Kleen, SuSE Labs */ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/file.h> #include <linux/icmpv6.h> #include <linux/socket.h> #include <linux/syscalls.h> #include <linux/filter.h> #include <linux/compat.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/export.h> #include <net/scm.h> #include <net/sock.h> #include <net/ip.h> #include <net/ipv6.h> #include <linux/uaccess.h> #include <net/compat.h> int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr *msg, struct sockaddr __user **save_addr) { ssize_t err; kmsg->msg_flags = msg->msg_flags; kmsg->msg_namelen = msg->msg_namelen; if (!msg->msg_name) kmsg->msg_namelen = 0; if (kmsg->msg_namelen < 0) return -EINVAL; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control_is_user = true; kmsg->msg_get_inq = 0; kmsg->msg_control_user = compat_ptr(msg->msg_control); kmsg->msg_controllen = msg->msg_controllen; if (save_addr) *save_addr = compat_ptr(msg->msg_name); if (msg->msg_name && kmsg->msg_namelen) { if (!save_addr) { err = move_addr_to_kernel(compat_ptr(msg->msg_name), kmsg->msg_namelen, kmsg->msg_name); if (err < 0) return err; } } else { kmsg->msg_name = NULL; kmsg->msg_namelen = 0; } if (msg->msg_iovlen > UIO_MAXIOV) return -EMSGSIZE; kmsg->msg_iocb = NULL; kmsg->msg_ubuf = NULL; return 0; } int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, struct sockaddr __user **save_addr, struct iovec **iov) { struct compat_msghdr msg; ssize_t err; if (copy_from_user(&msg, umsg, sizeof(*umsg))) return -EFAULT; err = __get_compat_msghdr(kmsg, &msg, save_addr); if (err) return err; err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE, compat_ptr(msg.msg_iov), msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); return err < 0 ? err : 0; } /* Bleech... */ #define CMSG_COMPAT_ALIGN(len) ALIGN((len), sizeof(s32)) #define CMSG_COMPAT_DATA(cmsg) \ ((void __user *)((char __user *)(cmsg) + sizeof(struct compat_cmsghdr))) #define CMSG_COMPAT_SPACE(len) \ (sizeof(struct compat_cmsghdr) + CMSG_COMPAT_ALIGN(len)) #define CMSG_COMPAT_LEN(len) \ (sizeof(struct compat_cmsghdr) + (len)) #define CMSG_COMPAT_FIRSTHDR(msg) \ (((msg)->msg_controllen) >= sizeof(struct compat_cmsghdr) ? \ (struct compat_cmsghdr __user *)((msg)->msg_control_user) : \ (struct compat_cmsghdr __user *)NULL) #define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \ ((ucmlen) >= sizeof(struct compat_cmsghdr) && \ (ucmlen) <= (unsigned long) \ ((mhdr)->msg_controllen - \ ((char __user *)(ucmsg) - (char __user *)(mhdr)->msg_control_user))) static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg, struct compat_cmsghdr __user *cmsg, int cmsg_len) { char __user *ptr = (char __user *)cmsg + CMSG_COMPAT_ALIGN(cmsg_len); if ((unsigned long)(ptr + 1 - (char __user *)msg->msg_control_user) > msg->msg_controllen) return NULL; return (struct compat_cmsghdr __user *)ptr; } /* There is a lot of hair here because the alignment rules (and * thus placement) of cmsg headers and length are different for * 32-bit apps. -DaveM */ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, unsigned char *stackbuf, int stackbuf_size) { struct compat_cmsghdr __user *ucmsg; struct cmsghdr *kcmsg, *kcmsg_base; compat_size_t ucmlen; __kernel_size_t kcmlen, tmp; int err = -EFAULT; BUILD_BUG_ON(sizeof(struct compat_cmsghdr) != CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr))); kcmlen = 0; kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while (ucmsg != NULL) { if (get_user(ucmlen, &ucmsg->cmsg_len)) return -EFAULT; /* Catch bogons. */ if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg)) return -EINVAL; tmp = ((ucmlen - sizeof(*ucmsg)) + sizeof(struct cmsghdr)); tmp = CMSG_ALIGN(tmp); kcmlen += tmp; ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } if (kcmlen == 0) return -EINVAL; /* The kcmlen holds the 64-bit version of the control length. * It may not be modified as we do not stick it into the kmsg * until we have successfully copied over all of the data * from the user. */ if (kcmlen > stackbuf_size) kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); if (kcmsg == NULL) return -ENOMEM; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); while (ucmsg != NULL) { struct compat_cmsghdr cmsg; if (copy_from_user(&cmsg, ucmsg, sizeof(cmsg))) goto Efault; if (!CMSG_COMPAT_OK(cmsg.cmsg_len, ucmsg, kmsg)) goto Einval; tmp = ((cmsg.cmsg_len - sizeof(*ucmsg)) + sizeof(struct cmsghdr)); if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp)) goto Einval; kcmsg->cmsg_len = tmp; kcmsg->cmsg_level = cmsg.cmsg_level; kcmsg->cmsg_type = cmsg.cmsg_type; tmp = CMSG_ALIGN(tmp); if (copy_from_user(CMSG_DATA(kcmsg), CMSG_COMPAT_DATA(ucmsg), (cmsg.cmsg_len - sizeof(*ucmsg)))) goto Efault; /* Advance. */ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len); } /* * check the length of messages copied in is the same as the * what we get from the first loop */ if ((char *)kcmsg - (char *)kcmsg_base != kcmlen) goto Einval; /* Ok, looks like we made it. Hook it up and return success. */ kmsg->msg_control_is_user = false; kmsg->msg_control = kcmsg_base; kmsg->msg_controllen = kcmlen; return 0; Einval: err = -EINVAL; Efault: if (kcmsg_base != (struct cmsghdr *)stackbuf) sock_kfree_s(sk, kcmsg_base, kcmlen); return err; } int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control_user; struct compat_cmsghdr cmhdr; struct old_timeval32 ctv; struct old_timespec32 cts[3]; int cmlen; if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { kmsg->msg_flags |= MSG_CTRUNC; return 0; /* XXX: return error? check spec. */ } if (!COMPAT_USE_64BIT_TIME) { if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) { struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } if (level == SOL_SOCKET && (type == SO_TIMESTAMPNS_OLD || type == SO_TIMESTAMPING_OLD)) { int count = type == SO_TIMESTAMPNS_OLD ? 1 : 3; int i; struct __kernel_old_timespec *ts = data; for (i = 0; i < count; i++) { cts[i].tv_sec = ts[i].tv_sec; cts[i].tv_nsec = ts[i].tv_nsec; } data = &cts; len = sizeof(cts[0]) * count; } } cmlen = CMSG_COMPAT_LEN(len); if (kmsg->msg_controllen < cmlen) { kmsg->msg_flags |= MSG_CTRUNC; cmlen = kmsg->msg_controllen; } cmhdr.cmsg_level = level; cmhdr.cmsg_type = type; cmhdr.cmsg_len = cmlen; if (copy_to_user(cm, &cmhdr, sizeof cmhdr)) return -EFAULT; if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr))) return -EFAULT; cmlen = CMSG_COMPAT_SPACE(len); if (kmsg->msg_controllen < cmlen) cmlen = kmsg->msg_controllen; kmsg->msg_control_user += cmlen; kmsg->msg_controllen -= cmlen; return 0; } static int scm_max_fds_compat(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct compat_cmsghdr)) return 0; return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); } void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *)msg->msg_control_user; unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count); int __user *cmsg_data = CMSG_COMPAT_DATA(cm); int err = 0, i; for (i = 0; i < fdmax; i++) { err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; } if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_COMPAT_SPACE(i * sizeof(int)); if (msg->msg_controllen < cmlen) cmlen = msg->msg_controllen; msg->msg_control_user += cmlen; msg->msg_controllen -= cmlen; } } if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) msg->msg_flags |= MSG_CTRUNC; /* * All of the files that fit in the message have had their usage counts * incremented, so we just free the list. */ __scm_destroy(scm); } /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), AL(4), AL(5), AL(4) }; #undef AL static inline long __compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { return __compat_sys_sendmsg(fd, msg, flags); } static inline long __compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags) { return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { return __compat_sys_sendmmsg(fd, mmsg, vlen, flags); } static inline long __compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) { return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT, false); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { return __compat_sys_recvmsg(fd, msg, flags); } static inline long __compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned int flags, struct sockaddr __user *addr, int __user *addrlen) { return __sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { return __compat_sys_recvfrom(fd, buf, len, flags, NULL, NULL); } COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int __user *, addrlen) { return __compat_sys_recvfrom(fd, buf, len, flags, addr, addrlen); } COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct __kernel_timespec __user *, timeout) { return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, timeout, NULL); } #ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags, struct old_timespec32 __user *, timeout) { return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL, timeout); } #endif COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { u32 a[AUDITSC_ARGS]; unsigned int len; u32 a0, a1; int ret; if (call < SYS_SOCKET || call > SYS_SENDMMSG) return -EINVAL; len = nas[call]; if (len > sizeof(a)) return -EINVAL; if (copy_from_user(a, args, len)) return -EFAULT; ret = audit_socketcall_compat(len / sizeof(a[0]), a); if (ret) return ret; a0 = a[0]; a1 = a[1]; switch (call) { case SYS_SOCKET: ret = __sys_socket(a0, a1, a[2]); break; case SYS_BIND: ret = __sys_bind(a0, compat_ptr(a1), a[2]); break; case SYS_CONNECT: ret = __sys_connect(a0, compat_ptr(a1), a[2]); break; case SYS_LISTEN: ret = __sys_listen(a0, a1); break; case SYS_ACCEPT: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = __sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_GETPEERNAME: ret = __sys_getpeername(a0, compat_ptr(a1), compat_ptr(a[2])); break; case SYS_SOCKETPAIR: ret = __sys_socketpair(a0, a1, a[2], compat_ptr(a[3])); break; case SYS_SEND: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], NULL, 0); break; case SYS_SENDTO: ret = __sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]); break; case SYS_RECV: ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], NULL, NULL); break; case SYS_RECVFROM: ret = __compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: ret = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = __sys_setsockopt(a0, a1, a[2], compat_ptr(a[3]), a[4]); break; case SYS_GETSOCKOPT: ret = __sys_getsockopt(a0, a1, a[2], compat_ptr(a[3]), compat_ptr(a[4])); break; case SYS_SENDMSG: ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); break; case SYS_SENDMMSG: ret = __compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]); break; case SYS_RECVMSG: ret = __compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; case SYS_RECVMMSG: ret = __sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3] | MSG_CMSG_COMPAT, NULL, compat_ptr(a[4])); break; case SYS_ACCEPT4: ret = __sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; break; } return ret; }
1 1 1 7 7 7 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4 4 2 1 4 1 1 4 2 3 1 1 1 2 2 2 2 4 4 4 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/arp.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/init.h> #include <net/rose.h> #include <linux/seq_file.h> #include <linux/export.h> static unsigned int rose_neigh_no = 1; static struct rose_node *rose_node_list; static DEFINE_SPINLOCK(rose_node_list_lock); static struct rose_neigh *rose_neigh_list; static DEFINE_SPINLOCK(rose_neigh_list_lock); static struct rose_route *rose_route_list; static DEFINE_SPINLOCK(rose_route_list_lock); struct rose_neigh *rose_loopback_neigh; /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ static int __must_check rose_add_node(struct rose_route_struct *rose_route, struct net_device *dev) { struct rose_node *rose_node, *rose_tmpn, *rose_tmpp; struct rose_neigh *rose_neigh; int i, res = 0; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == rose_route->mask) && (rosecmpm(&rose_route->address, &rose_node->address, rose_route->mask) == 0)) break; rose_node = rose_node->next; } if (rose_node != NULL && rose_node->loopback) { res = -EINVAL; goto out; } rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0 && rose_neigh->dev == dev) break; rose_neigh = rose_neigh->next; } if (rose_neigh == NULL) { rose_neigh = kmalloc(sizeof(*rose_neigh), GFP_ATOMIC); if (rose_neigh == NULL) { res = -ENOMEM; goto out; } rose_neigh->callsign = rose_route->neighbour; rose_neigh->digipeat = NULL; rose_neigh->ax25 = NULL; rose_neigh->dev = dev; rose_neigh->count = 0; rose_neigh->use = 0; rose_neigh->dce_mode = 0; rose_neigh->loopback = 0; rose_neigh->number = rose_neigh_no++; rose_neigh->restarted = 0; skb_queue_head_init(&rose_neigh->queue); timer_setup(&rose_neigh->ftimer, NULL, 0); timer_setup(&rose_neigh->t0timer, NULL, 0); if (rose_route->ndigis != 0) { rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC); if (rose_neigh->digipeat == NULL) { kfree(rose_neigh); res = -ENOMEM; goto out; } rose_neigh->digipeat->ndigi = rose_route->ndigis; rose_neigh->digipeat->lastrepeat = -1; for (i = 0; i < rose_route->ndigis; i++) { rose_neigh->digipeat->calls[i] = rose_route->digipeaters[i]; rose_neigh->digipeat->repeated[i] = 0; } } rose_neigh->next = rose_neigh_list; rose_neigh_list = rose_neigh; } /* * This is a new node to be inserted into the list. Find where it needs * to be inserted into the list, and insert it. We want to be sure * to order the list in descending order of mask size to ensure that * later when we are searching this list the first match will be the * best match. */ if (rose_node == NULL) { rose_tmpn = rose_node_list; rose_tmpp = NULL; while (rose_tmpn != NULL) { if (rose_tmpn->mask > rose_route->mask) { rose_tmpp = rose_tmpn; rose_tmpn = rose_tmpn->next; } else { break; } } /* create new node */ rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC); if (rose_node == NULL) { res = -ENOMEM; goto out; } rose_node->address = rose_route->address; rose_node->mask = rose_route->mask; rose_node->count = 1; rose_node->loopback = 0; rose_node->neighbour[0] = rose_neigh; if (rose_tmpn == NULL) { if (rose_tmpp == NULL) { /* Empty list */ rose_node_list = rose_node; rose_node->next = NULL; } else { rose_tmpp->next = rose_node; rose_node->next = NULL; } } else { if (rose_tmpp == NULL) { /* 1st node */ rose_node->next = rose_node_list; rose_node_list = rose_node; } else { rose_tmpp->next = rose_node; rose_node->next = rose_tmpn; } } rose_neigh->count++; goto out; } /* We have space, slot it in */ if (rose_node->count < 3) { rose_node->neighbour[rose_node->count] = rose_neigh; rose_node->count++; rose_neigh->count++; } out: spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); return res; } /* * Caller is holding rose_node_list_lock. */ static void rose_remove_node(struct rose_node *rose_node) { struct rose_node *s; if ((s = rose_node_list) == rose_node) { rose_node_list = rose_node->next; kfree(rose_node); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_node) { s->next = rose_node->next; kfree(rose_node); return; } s = s->next; } } /* * Caller is holding rose_neigh_list_lock. */ static void rose_remove_neigh(struct rose_neigh *rose_neigh) { struct rose_neigh *s; del_timer_sync(&rose_neigh->ftimer); del_timer_sync(&rose_neigh->t0timer); skb_queue_purge(&rose_neigh->queue); if ((s = rose_neigh_list) == rose_neigh) { rose_neigh_list = rose_neigh->next; if (rose_neigh->ax25) ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_neigh) { s->next = rose_neigh->next; if (rose_neigh->ax25) ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; } s = s->next; } } /* * Caller is holding rose_route_list_lock. */ static void rose_remove_route(struct rose_route *rose_route) { struct rose_route *s; if (rose_route->neigh1 != NULL) rose_route->neigh1->use--; if (rose_route->neigh2 != NULL) rose_route->neigh2->use--; if ((s = rose_route_list) == rose_route) { rose_route_list = rose_route->next; kfree(rose_route); return; } while (s != NULL && s->next != NULL) { if (s->next == rose_route) { s->next = rose_route->next; kfree(rose_route); return; } s = s->next; } } /* * "Delete" a node. Strictly speaking remove a route to a node. The node * is only deleted if no routes are left to it. */ static int rose_del_node(struct rose_route_struct *rose_route, struct net_device *dev) { struct rose_node *rose_node; struct rose_neigh *rose_neigh; int i, err = 0; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == rose_route->mask) && (rosecmpm(&rose_route->address, &rose_node->address, rose_route->mask) == 0)) break; rose_node = rose_node->next; } if (rose_node == NULL || rose_node->loopback) { err = -EINVAL; goto out; } rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0 && rose_neigh->dev == dev) break; rose_neigh = rose_neigh->next; } if (rose_neigh == NULL) { err = -EINVAL; goto out; } for (i = 0; i < rose_node->count; i++) { if (rose_node->neighbour[i] == rose_neigh) { rose_neigh->count--; if (rose_neigh->count == 0 && rose_neigh->use == 0) rose_remove_neigh(rose_neigh); rose_node->count--; if (rose_node->count == 0) { rose_remove_node(rose_node); } else { switch (i) { case 0: rose_node->neighbour[0] = rose_node->neighbour[1]; fallthrough; case 1: rose_node->neighbour[1] = rose_node->neighbour[2]; break; case 2: break; } } goto out; } } err = -EINVAL; out: spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); return err; } /* * Add the loopback neighbour. */ void rose_add_loopback_neigh(void) { struct rose_neigh *sn; rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_KERNEL); if (!rose_loopback_neigh) return; sn = rose_loopback_neigh; sn->callsign = null_ax25_address; sn->digipeat = NULL; sn->ax25 = NULL; sn->dev = NULL; sn->count = 0; sn->use = 0; sn->dce_mode = 1; sn->loopback = 1; sn->number = rose_neigh_no++; sn->restarted = 1; skb_queue_head_init(&sn->queue); timer_setup(&sn->ftimer, NULL, 0); timer_setup(&sn->t0timer, NULL, 0); spin_lock_bh(&rose_neigh_list_lock); sn->next = rose_neigh_list; rose_neigh_list = sn; spin_unlock_bh(&rose_neigh_list_lock); } /* * Add a loopback node. */ int rose_add_loopback_node(const rose_address *address) { struct rose_node *rose_node; int err = 0; spin_lock_bh(&rose_node_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == 10) && (rosecmpm(address, &rose_node->address, 10) == 0) && rose_node->loopback) break; rose_node = rose_node->next; } if (rose_node != NULL) goto out; if ((rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC)) == NULL) { err = -ENOMEM; goto out; } rose_node->address = *address; rose_node->mask = 10; rose_node->count = 1; rose_node->loopback = 1; rose_node->neighbour[0] = rose_loopback_neigh; /* Insert at the head of list. Address is always mask=10 */ rose_node->next = rose_node_list; rose_node_list = rose_node; rose_loopback_neigh->count++; out: spin_unlock_bh(&rose_node_list_lock); return err; } /* * Delete a loopback node. */ void rose_del_loopback_node(const rose_address *address) { struct rose_node *rose_node; spin_lock_bh(&rose_node_list_lock); rose_node = rose_node_list; while (rose_node != NULL) { if ((rose_node->mask == 10) && (rosecmpm(address, &rose_node->address, 10) == 0) && rose_node->loopback) break; rose_node = rose_node->next; } if (rose_node == NULL) goto out; rose_remove_node(rose_node); rose_loopback_neigh->count--; out: spin_unlock_bh(&rose_node_list_lock); } /* * A device has been removed. Remove its routes and neighbours. */ void rose_rt_device_down(struct net_device *dev) { struct rose_neigh *s, *rose_neigh; struct rose_node *t, *rose_node; int i; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { s = rose_neigh; rose_neigh = rose_neigh->next; if (s->dev != dev) continue; rose_node = rose_node_list; while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; for (i = 0; i < t->count; i++) { if (t->neighbour[i] != s) continue; t->count--; switch (i) { case 0: t->neighbour[0] = t->neighbour[1]; fallthrough; case 1: t->neighbour[1] = t->neighbour[2]; break; case 2: break; } } if (t->count <= 0) rose_remove_node(t); } rose_remove_neigh(s); } spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); } #if 0 /* Currently unused */ /* * A device has been removed. Remove its links. */ void rose_route_device_down(struct net_device *dev) { struct rose_route *s, *rose_route; spin_lock_bh(&rose_route_list_lock); rose_route = rose_route_list; while (rose_route != NULL) { s = rose_route; rose_route = rose_route->next; if (s->neigh1->dev == dev || s->neigh2->dev == dev) rose_remove_route(s); } spin_unlock_bh(&rose_route_list_lock); } #endif /* * Clear all nodes and neighbours out, except for neighbours with * active connections going through them. * Do not clear loopback neighbour and nodes. */ static int rose_clear_routes(void) { struct rose_neigh *s, *rose_neigh; struct rose_node *t, *rose_node; spin_lock_bh(&rose_node_list_lock); spin_lock_bh(&rose_neigh_list_lock); rose_neigh = rose_neigh_list; rose_node = rose_node_list; while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; if (!t->loopback) rose_remove_node(t); } while (rose_neigh != NULL) { s = rose_neigh; rose_neigh = rose_neigh->next; if (s->use == 0 && !s->loopback) { s->count = 0; rose_remove_neigh(s); } } spin_unlock_bh(&rose_neigh_list_lock); spin_unlock_bh(&rose_node_list_lock); return 0; } /* * Check that the device given is a valid AX.25 interface that is "up". * called with RTNL */ static struct net_device *rose_ax25_dev_find(char *devname) { struct net_device *dev; if ((dev = __dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) return dev; return NULL; } /* * Find the first active ROSE device, usually "rose0". */ struct net_device *rose_dev_first(void) { struct net_device *dev, *first = NULL; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } if (first) dev_hold(first); rcu_read_unlock(); return first; } /* * Find the ROSE device for the given address. */ struct net_device *rose_dev_get(rose_address *addr) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } dev = NULL; out: rcu_read_unlock(); return dev; } static int rose_dev_exists(rose_address *addr) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; out: rcu_read_unlock(); return dev != NULL; } struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neigh) { struct rose_route *rose_route; for (rose_route = rose_route_list; rose_route != NULL; rose_route = rose_route->next) if ((rose_route->neigh1 == neigh && rose_route->lci1 == lci) || (rose_route->neigh2 == neigh && rose_route->lci2 == lci)) return rose_route; return NULL; } /* * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, unsigned char *diagnostic, int route_frame) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; if (!route_frame) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { if (node->neighbour[i]->restarted) { res = node->neighbour[i]; goto out; } } } } if (!route_frame) { /* connect request */ for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; goto out; } failed = 1; } } } } if (failed) { *cause = ROSE_OUT_OF_ORDER; *diagnostic = 0; } else { *cause = ROSE_NOT_OBTAINABLE; *diagnostic = 0; } out: if (!route_frame) spin_unlock_bh(&rose_node_list_lock); return res; } /* * Handle the ioctls that control the routing functions. */ int rose_rt_ioctl(unsigned int cmd, void __user *arg) { struct rose_route_struct rose_route; struct net_device *dev; int err; switch (cmd) { case SIOCADDRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; if (rose_dev_exists(&rose_route.address)) /* Can't add routes to ourself */ return -EINVAL; if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ return -EINVAL; if (rose_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; err = rose_add_node(&rose_route, dev); return err; case SIOCDELRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; err = rose_del_node(&rose_route, dev); return err; case SIOCRSCLRRT: return rose_clear_routes(); default: return -EINVAL; } return 0; } static void rose_del_route_by_neigh(struct rose_neigh *rose_neigh) { struct rose_route *rose_route, *s; rose_neigh->restarted = 0; rose_stop_t0timer(rose_neigh); rose_start_ftimer(rose_neigh); skb_queue_purge(&rose_neigh->queue); spin_lock_bh(&rose_route_list_lock); rose_route = rose_route_list; while (rose_route != NULL) { if ((rose_route->neigh1 == rose_neigh && rose_route->neigh2 == rose_neigh) || (rose_route->neigh1 == rose_neigh && rose_route->neigh2 == NULL) || (rose_route->neigh2 == rose_neigh && rose_route->neigh1 == NULL)) { s = rose_route->next; rose_remove_route(rose_route); rose_route = s; continue; } if (rose_route->neigh1 == rose_neigh) { rose_route->neigh1->use--; rose_route->neigh1 = NULL; rose_transmit_clear_request(rose_route->neigh2, rose_route->lci2, ROSE_OUT_OF_ORDER, 0); } if (rose_route->neigh2 == rose_neigh) { rose_route->neigh2->use--; rose_route->neigh2 = NULL; rose_transmit_clear_request(rose_route->neigh1, rose_route->lci1, ROSE_OUT_OF_ORDER, 0); } rose_route = rose_route->next; } spin_unlock_bh(&rose_route_list_lock); } /* * A level 2 link has timed out, therefore it appears to be a poor link, * then don't use that neighbour until it is reset. Blow away all through * routes and connections using this route. */ void rose_link_failed(ax25_cb *ax25, int reason) { struct rose_neigh *rose_neigh; spin_lock_bh(&rose_neigh_list_lock); rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (rose_neigh->ax25 == ax25) break; rose_neigh = rose_neigh->next; } if (rose_neigh != NULL) { rose_neigh->ax25 = NULL; ax25_cb_put(ax25); rose_del_route_by_neigh(rose_neigh); rose_kill_by_neigh(rose_neigh); } spin_unlock_bh(&rose_neigh_list_lock); } /* * A device has been "downed" remove its link status. Blow away all * through routes and connections that use this device. */ void rose_link_device_down(struct net_device *dev) { struct rose_neigh *rose_neigh; for (rose_neigh = rose_neigh_list; rose_neigh != NULL; rose_neigh = rose_neigh->next) { if (rose_neigh->dev == dev) { rose_del_route_by_neigh(rose_neigh); rose_kill_by_neigh(rose_neigh); } } } /* * Route a frame to an appropriate AX.25 connection. * A NULL ax25_cb indicates an internally generated frame. */ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) { struct rose_neigh *rose_neigh, *new_neigh; struct rose_route *rose_route; struct rose_facilities_struct facilities; rose_address *src_addr, *dest_addr; struct sock *sk; unsigned short frametype; unsigned int lci, new_lci; unsigned char cause, diagnostic; struct net_device *dev; int res = 0; char buf[11]; if (skb->len < ROSE_MIN_LEN) return res; if (!ax25) return rose_loopback_queue(skb, NULL); frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); if (frametype == ROSE_CALL_REQUEST && (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != ROSE_CALL_REQ_ADDR_LEN_VAL)) return res; src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF); dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); spin_lock_bh(&rose_neigh_list_lock); spin_lock_bh(&rose_route_list_lock); rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { if (ax25cmp(&ax25->dest_addr, &rose_neigh->callsign) == 0 && ax25->ax25_dev->dev == rose_neigh->dev) break; rose_neigh = rose_neigh->next; } if (rose_neigh == NULL) { printk("rose_route : unknown neighbour or device %s\n", ax2asc(buf, &ax25->dest_addr)); goto out; } /* * Obviously the link is working, halt the ftimer. */ rose_stop_ftimer(rose_neigh); /* * LCI of zero is always for us, and its always a restart * frame. */ if (lci == 0) { rose_link_rx_restart(skb, rose_neigh, frametype); goto out; } /* * Find an existing socket. */ if ((sk = rose_find_socket(lci, rose_neigh)) != NULL) { if (frametype == ROSE_CALL_REQUEST) { struct rose_sock *rose = rose_sk(sk); /* Remove an existing unused socket */ rose_clear_queues(sk); rose->cause = ROSE_NETWORK_CONGESTION; rose->diagnostic = 0; rose->neighbour->use--; rose->neighbour = NULL; rose->lci = 0; rose->state = ROSE_STATE_0; sk->sk_state = TCP_CLOSE; sk->sk_err = 0; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } } else { skb_reset_transport_header(skb); res = rose_process_rx_frame(sk, skb); goto out; } } /* * Is is a Call Request and is it for us ? */ if (frametype == ROSE_CALL_REQUEST) if ((dev = rose_dev_get(dest_addr)) != NULL) { res = rose_rx_call_request(skb, dev, rose_neigh, lci); dev_put(dev); goto out; } if (!sysctl_rose_routing_control) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 0); goto out; } /* * Route it to the next in line if we have an entry for it. */ rose_route = rose_route_list; while (rose_route != NULL) { if (rose_route->lci1 == lci && rose_route->neigh1 == rose_neigh) { if (frametype == ROSE_CALL_REQUEST) { /* F6FBB - Remove an existing unused route */ rose_remove_route(rose_route); break; } else if (rose_route->neigh2 != NULL) { skb->data[0] &= 0xF0; skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F; skb->data[1] = (rose_route->lci2 >> 0) & 0xFF; rose_transmit_link(skb, rose_route->neigh2); if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); res = 1; goto out; } else { if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); goto out; } } if (rose_route->lci2 == lci && rose_route->neigh2 == rose_neigh) { if (frametype == ROSE_CALL_REQUEST) { /* F6FBB - Remove an existing unused route */ rose_remove_route(rose_route); break; } else if (rose_route->neigh1 != NULL) { skb->data[0] &= 0xF0; skb->data[0] |= (rose_route->lci1 >> 8) & 0x0F; skb->data[1] = (rose_route->lci1 >> 0) & 0xFF; rose_transmit_link(skb, rose_route->neigh1); if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); res = 1; goto out; } else { if (frametype == ROSE_CLEAR_CONFIRMATION) rose_remove_route(rose_route); goto out; } } rose_route = rose_route->next; } /* * We know that: * 1. The frame isn't for us, * 2. It isn't "owned" by any existing route. */ if (frametype != ROSE_CALL_REQUEST) { /* XXX */ res = 0; goto out; } memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, skb->len - ROSE_CALL_REQ_FACILITIES_OFF, &facilities)) { rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); goto out; } /* * Check for routing loops. */ rose_route = rose_route_list; while (rose_route != NULL) { if (rose_route->rand == facilities.rand && rosecmp(src_addr, &rose_route->src_addr) == 0 && ax25cmp(&facilities.dest_call, &rose_route->src_call) == 0 && ax25cmp(&facilities.source_call, &rose_route->dest_call) == 0) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NOT_OBTAINABLE, 120); goto out; } rose_route = rose_route->next; } if ((new_neigh = rose_get_neigh(dest_addr, &cause, &diagnostic, 1)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, cause, diagnostic); goto out; } if ((new_lci = rose_new_lci(new_neigh)) == 0) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 71); goto out; } if ((rose_route = kmalloc(sizeof(*rose_route), GFP_ATOMIC)) == NULL) { rose_transmit_clear_request(rose_neigh, lci, ROSE_NETWORK_CONGESTION, 120); goto out; } rose_route->lci1 = lci; rose_route->src_addr = *src_addr; rose_route->dest_addr = *dest_addr; rose_route->src_call = facilities.dest_call; rose_route->dest_call = facilities.source_call; rose_route->rand = facilities.rand; rose_route->neigh1 = rose_neigh; rose_route->lci2 = new_lci; rose_route->neigh2 = new_neigh; rose_route->neigh1->use++; rose_route->neigh2->use++; rose_route->next = rose_route_list; rose_route_list = rose_route; skb->data[0] &= 0xF0; skb->data[0] |= (rose_route->lci2 >> 8) & 0x0F; skb->data[1] = (rose_route->lci2 >> 0) & 0xFF; rose_transmit_link(skb, rose_route->neigh2); res = 1; out: spin_unlock_bh(&rose_route_list_lock); spin_unlock_bh(&rose_neigh_list_lock); return res; } #ifdef CONFIG_PROC_FS static void *rose_node_start(struct seq_file *seq, loff_t *pos) __acquires(rose_node_list_lock) { struct rose_node *rose_node; int i = 1; spin_lock_bh(&rose_node_list_lock); if (*pos == 0) return SEQ_START_TOKEN; for (rose_node = rose_node_list; rose_node && i < *pos; rose_node = rose_node->next, ++i); return (i == *pos) ? rose_node : NULL; } static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? rose_node_list : ((struct rose_node *)v)->next; } static void rose_node_stop(struct seq_file *seq, void *v) __releases(rose_node_list_lock) { spin_unlock_bh(&rose_node_list_lock); } static int rose_node_show(struct seq_file *seq, void *v) { char rsbuf[11]; int i; if (v == SEQ_START_TOKEN) seq_puts(seq, "address mask n neigh neigh neigh\n"); else { const struct rose_node *rose_node = v; seq_printf(seq, "%-10s %04d %d", rose2asc(rsbuf, &rose_node->address), rose_node->mask, rose_node->count); for (i = 0; i < rose_node->count; i++) seq_printf(seq, " %05d", rose_node->neighbour[i]->number); seq_puts(seq, "\n"); } return 0; } const struct seq_operations rose_node_seqops = { .start = rose_node_start, .next = rose_node_next, .stop = rose_node_stop, .show = rose_node_show, }; static void *rose_neigh_start(struct seq_file *seq, loff_t *pos) __acquires(rose_neigh_list_lock) { struct rose_neigh *rose_neigh; int i = 1; spin_lock_bh(&rose_neigh_list_lock); if (*pos == 0) return SEQ_START_TOKEN; for (rose_neigh = rose_neigh_list; rose_neigh && i < *pos; rose_neigh = rose_neigh->next, ++i); return (i == *pos) ? rose_neigh : NULL; } static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? rose_neigh_list : ((struct rose_neigh *)v)->next; } static void rose_neigh_stop(struct seq_file *seq, void *v) __releases(rose_neigh_list_lock) { spin_unlock_bh(&rose_neigh_list_lock); } static int rose_neigh_show(struct seq_file *seq, void *v) { char buf[11]; int i; if (v == SEQ_START_TOKEN) seq_puts(seq, "addr callsign dev count use mode restart t0 tf digipeaters\n"); else { struct rose_neigh *rose_neigh = v; /* if (!rose_neigh->loopback) { */ seq_printf(seq, "%05d %-9s %-4s %3d %3d %3s %3s %3lu %3lu", rose_neigh->number, (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign), rose_neigh->dev ? rose_neigh->dev->name : "???", rose_neigh->count, rose_neigh->use, (rose_neigh->dce_mode) ? "DCE" : "DTE", (rose_neigh->restarted) ? "yes" : "no", ax25_display_timer(&rose_neigh->t0timer) / HZ, ax25_display_timer(&rose_neigh->ftimer) / HZ); if (rose_neigh->digipeat != NULL) { for (i = 0; i < rose_neigh->digipeat->ndigi; i++) seq_printf(seq, " %s", ax2asc(buf, &rose_neigh->digipeat->calls[i])); } seq_puts(seq, "\n"); } return 0; } const struct seq_operations rose_neigh_seqops = { .start = rose_neigh_start, .next = rose_neigh_next, .stop = rose_neigh_stop, .show = rose_neigh_show, }; static void *rose_route_start(struct seq_file *seq, loff_t *pos) __acquires(rose_route_list_lock) { struct rose_route *rose_route; int i = 1; spin_lock_bh(&rose_route_list_lock); if (*pos == 0) return SEQ_START_TOKEN; for (rose_route = rose_route_list; rose_route && i < *pos; rose_route = rose_route->next, ++i); return (i == *pos) ? rose_route : NULL; } static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (v == SEQ_START_TOKEN) ? rose_route_list : ((struct rose_route *)v)->next; } static void rose_route_stop(struct seq_file *seq, void *v) __releases(rose_route_list_lock) { spin_unlock_bh(&rose_route_list_lock); } static int rose_route_show(struct seq_file *seq, void *v) { char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, "lci address callsign neigh <-> lci address callsign neigh\n"); else { struct rose_route *rose_route = v; if (rose_route->neigh1) seq_printf(seq, "%3.3X %-10s %-9s %05d ", rose_route->lci1, rose2asc(rsbuf, &rose_route->src_addr), ax2asc(buf, &rose_route->src_call), rose_route->neigh1->number); else seq_puts(seq, "000 * * 00000 "); if (rose_route->neigh2) seq_printf(seq, "%3.3X %-10s %-9s %05d\n", rose_route->lci2, rose2asc(rsbuf, &rose_route->dest_addr), ax2asc(buf, &rose_route->dest_call), rose_route->neigh2->number); else seq_puts(seq, "000 * * 00000\n"); } return 0; } struct seq_operations rose_route_seqops = { .start = rose_route_start, .next = rose_route_next, .stop = rose_route_stop, .show = rose_route_show, }; #endif /* CONFIG_PROC_FS */ /* * Release all memory associated with ROSE routing structures. */ void __exit rose_rt_free(void) { struct rose_neigh *s, *rose_neigh = rose_neigh_list; struct rose_node *t, *rose_node = rose_node_list; struct rose_route *u, *rose_route = rose_route_list; while (rose_neigh != NULL) { s = rose_neigh; rose_neigh = rose_neigh->next; rose_remove_neigh(s); } while (rose_node != NULL) { t = rose_node; rose_node = rose_node->next; rose_remove_node(t); } while (rose_route != NULL) { u = rose_route; rose_route = rose_route->next; rose_remove_route(u); } }
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 10 1 9 9 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 14 14 14 14 14 14 14 14 14 14 3 3 3 3 3 1 1 1 1 1 1 1 1 114 114 114 114 114 114 114 114 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 // SPDX-License-Identifier: GPL-2.0-only /* * Syscall interface to knfsd. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/slab.h> #include <linux/namei.h> #include <linux/ctype.h> #include <linux/fs_context.h> #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/svc.h> #include <linux/module.h> #include <linux/fsnotify.h> #include <linux/nfslocalio.h> #include "idmap.h" #include "nfsd.h" #include "cache.h" #include "state.h" #include "netns.h" #include "pnfs.h" #include "filecache.h" #include "trace.h" #include "netlink.h" /* * We have a single directory with several nodes in it. */ enum { NFSD_Root = 1, NFSD_List, NFSD_Export_Stats, NFSD_Export_features, NFSD_Fh, NFSD_FO_UnlockIP, NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, NFSD_Pool_Stats, NFSD_Reply_Cache_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, NFSD_MaxConnections, NFSD_Filecache, NFSD_Leasetime, NFSD_Gracetime, NFSD_RecoveryDir, NFSD_V4EndGrace, NFSD_MaxReserved }; /* * write() for these nodes. */ static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); static ssize_t write_threads(struct file *file, char *buf, size_t size); static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); #endif static ssize_t (*const write_op[])(struct file *, char *, size_t) = { [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, [NFSD_Threads] = write_threads, [NFSD_Pool_Threads] = write_pool_threads, [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, [NFSD_MaxConnections] = write_maxconn, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = write_recoverydir, #endif [NFSD_V4EndGrace] = write_v4_end_grace, #endif }; static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; if (ino >= ARRAY_SIZE(write_op) || !write_op[ino]) return -EINVAL; data = simple_transaction_get(file, buf, size); if (IS_ERR(data)) return PTR_ERR(data); rv = write_op[ino](file, data, size); if (rv < 0) return rv; simple_transaction_set(file, rv); return size; } static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return * state information */ ssize_t rv = nfsctl_transaction_write(file, buf, 0, pos); if (rv < 0) return rv; } return simple_transaction_read(file, buf, size, pos); } static const struct file_operations transaction_ops = { .write = nfsctl_transaction_write, .read = nfsctl_transaction_read, .release = simple_transaction_release, .llseek = default_llseek, }; static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) return err; seq = file->private_data; seq->private = nn->svc_export_cache; return 0; } static int exports_nfsd_open(struct inode *inode, struct file *file) { return exports_net_open(inode->i_sb->s_fs_info, file); } static const struct file_operations exports_nfsd_operations = { .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static int export_features_show(struct seq_file *m, void *v) { seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS); return 0; } DEFINE_SHOW_ATTRIBUTE(export_features); static int nfsd_pool_stats_open(struct inode *inode, struct file *file) { struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); return svc_pool_stats_open(&nn->nfsd_info, file); } static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats); /*----------------------------------------------------------------------------*/ /* * payload - write methods */ static inline struct net *netns(struct file *file) { return file_inode(file)->i_sb->s_fs_info; } /* * write_unlock_ip - Release all locks used by a client * * Experimental. * * Input: * buf: '\n'-terminated C string containing a * presentation format IP address * size: length of C string in @buf * Output: * On success: returns zero if all specified locks were released; * returns one if one or more locks were not released * On error: return code is negative errno value */ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) { struct sockaddr_storage address; struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; struct net *net = netns(file); /* sanity check */ if (size == 0) return -EINVAL; if (buf[size-1] != '\n') return -EINVAL; fo_path = buf; if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; trace_nfsd_ctl_unlock_ip(net, buf); return nlmsvc_unlock_all_by_ip(sap); } /* * write_unlock_fs - Release all locks on a local file system * * Experimental. * * Input: * buf: '\n'-terminated C string containing the * absolute pathname of a local file system * size: length of C string in @buf * Output: * On success: returns zero if all specified locks were released; * returns one if one or more locks were not released * On error: return code is negative errno value */ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) { struct path path; char *fo_path; int error; /* sanity check */ if (size == 0) return -EINVAL; if (buf[size-1] != '\n') return -EINVAL; fo_path = buf; if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; trace_nfsd_ctl_unlock_fs(netns(file), fo_path); error = kern_path(fo_path, 0, &path); if (error) return error; /* * XXX: Needs better sanity checking. Otherwise we could end up * releasing locks on the wrong file system. * * For example: * 1. Does the path refer to a directory? * 2. Is that directory a mount point, or * 3. Is that directory the root of an exported file system? */ error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); nfsd4_revoke_states(netns(file), path.dentry->d_sb); path_put(&path); return error; } /* * write_filehandle - Get a variable-length NFS file handle by path * * On input, the buffer contains a '\n'-terminated C string comprised of * three alphanumeric words separated by whitespace. The string may * contain escape sequences. * * Input: * buf: * domain: client domain name * path: export pathname * maxsize: numeric maximum size of * @buf * size: length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing a ASCII hex text version * of the NFS file handle; * return code is the size in bytes of the string * On error: return code is negative errno value */ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) { char *dname, *path; int maxsize; char *mesg = buf; int len; struct auth_domain *dom; struct knfsd_fh fh; if (size == 0) return -EINVAL; if (buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; dname = mesg; len = qword_get(&mesg, dname, size); if (len <= 0) return -EINVAL; path = dname+len+1; len = qword_get(&mesg, path, size); if (len <= 0) return -EINVAL; len = get_int(&mesg, &maxsize); if (len) return len; if (maxsize < NFS_FHSIZE) return -EINVAL; maxsize = min(maxsize, NFS3_FHSIZE); if (qword_get(&mesg, mesg, size) > 0) return -EINVAL; trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize); /* we have all the words, they are in buf.. */ dom = unix_domain_find(dname); if (!dom) return -ENOMEM; len = exp_rootfh(netns(file), dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); mesg[-1] = '\n'; return mesg - buf; } /* * write_threads - Start NFSD, or report the current number of running threads * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with '\n'-terminated C * string numeric value representing the number of * running NFSD threads; * return code is the size in bytes of the string * On error: return code is zero * * OR * * Input: * buf: C string containing an unsigned * integer value representing the * number of NFSD threads to start * size: non-zero length of C string in @buf * Output: * On success: NFS service is started; * passed-in buffer filled with '\n'-terminated C * string numeric value representing the number of * running NFSD threads; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; struct net *net = netns(file); if (size > 0) { int newthreads; rv = get_int(&mesg, &newthreads); if (rv) return rv; if (newthreads < 0) return -EINVAL; trace_nfsd_ctl_threads(net, newthreads); mutex_lock(&nfsd_mutex); rv = nfsd_svc(1, &newthreads, net, file->f_cred, NULL); mutex_unlock(&nfsd_mutex); if (rv < 0) return rv; } else rv = nfsd_nrthreads(net); return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } /* * write_pool_threads - Set or report the current number of threads per pool * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing whitespace- * separated unsigned integer values * representing the number of NFSD * threads to start in each pool * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing integer values representing the * number of NFSD threads in each pool; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for an array of number of threads per node * and apply them then write out number of threads per node as reply */ char *mesg = buf; int i; int rv; int len; int npools; int *nthreads; struct net *net = netns(file); mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); if (npools == 0) { /* * NFS is shut down. The admin can start it by * writing to the threads file but NOT the pool_threads * file, sorry. Report zero threads. */ mutex_unlock(&nfsd_mutex); strcpy(buf, "0\n"); return strlen(buf); } nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); rv = -ENOMEM; if (nthreads == NULL) goto out_free; if (size > 0) { for (i = 0; i < npools; i++) { rv = get_int(&mesg, &nthreads[i]); if (rv == -ENOENT) break; /* fewer numbers than pools */ if (rv) goto out_free; /* syntax error */ rv = -EINVAL; if (nthreads[i] < 0) goto out_free; trace_nfsd_ctl_pool_threads(net, i, nthreads[i]); } /* * There must always be a thread in pool 0; the admin * can't shut down NFS completely using pool_threads. */ if (nthreads[0] == 0) nthreads[0] = 1; rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; } rv = nfsd_get_nrthreads(npools, nthreads, net); if (rv) goto out_free; mesg = buf; size = SIMPLE_TRANSACTION_LIMIT; for (i = 0; i < npools && size > 0; i++) { snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' ')); len = strlen(mesg); size -= len; mesg += len; } rv = mesg - buf; out_free: kfree(nthreads); mutex_unlock(&nfsd_mutex); return rv; } static ssize_t nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining, const char *sep, unsigned vers, int minor) { const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u"; bool supported = !!nfsd_vers(nn, vers, NFSD_TEST); if (vers == 4 && minor >= 0 && !nfsd_minorversion(nn, minor, NFSD_TEST)) supported = false; if (minor == 0 && supported) /* * special case for backward compatability. * +4.0 is never reported, it is implied by * +4, unless -4.0 is present. */ return 0; return snprintf(buf, remaining, format, sep, supported ? '+' : '-', vers, minor); } static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; char *vers, *minorp, sign; int len, num, remaining; ssize_t tlen = 0; char *sep; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { if (nn->nfsd_serv) /* Cannot change versions without updating * nn->nfsd_serv->sv_xdrsize, and reallocing * rq_argp and rq_resp */ return -EBUSY; if (buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; trace_nfsd_ctl_version(netns(file), buf); vers = mesg; len = qword_get(&mesg, vers, size); if (len <= 0) return -EINVAL; do { enum vers_op cmd; unsigned minor; sign = *vers; if (sign == '+' || sign == '-') num = simple_strtol((vers+1), &minorp, 0); else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { if (num != 4) return -EINVAL; if (kstrtouint(minorp+1, 0, &minor) < 0) return -EINVAL; } cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { #ifdef CONFIG_NFSD_V2 case 2: #endif case 3: nfsd_vers(nn, num, cmd); break; case 4: if (*minorp == '.') { if (nfsd_minorversion(nn, minor, cmd) < 0) return -EINVAL; } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) { /* * Either we have +4 and no minors are enabled, * or we have -4 and at least one minor is enabled. * In either case, propagate 'cmd' to all minors. */ minor = 0; while (nfsd_minorversion(nn, minor, cmd) >= 0) minor++; } break; default: /* Ignore requests to disable non-existent versions */ if (cmd == NFSD_SET) return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); /* If all get turned off, turn them back on, as * having no versions is BAD */ nfsd_reset_versions(nn); } /* Now write current state into reply buffer */ sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { int minor; if (!nfsd_vers(nn, num, NFSD_AVAIL)) continue; minor = -1; do { len = nfsd_print_version_support(nn, buf, remaining, sep, num, minor); if (len >= remaining) goto out; remaining -= len; buf += len; tlen += len; minor++; if (len) sep = " "; } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION); } out: len = snprintf(buf, remaining, "\n"); if (len >= remaining) return -EINVAL; return tlen + len; } /* * write_versions - Set or report the available NFS protocol versions * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing positive or negative integer * values representing the current status of each * protocol version; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value * * OR * * Input: * buf: C string containing whitespace- * separated positive or negative * integer values representing NFS * protocol versions to enable ("+n") * or disable ("-n") * size: non-zero length of C string in @buf * Output: * On success: status of zero or more protocol versions has * been updated; passed-in buffer filled with * '\n'-terminated C string containing positive * or negative integer values representing the * current status of each protocol version; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_versions(struct file *file, char *buf, size_t size) { ssize_t rv; mutex_lock(&nfsd_mutex); rv = __write_versions(file, buf, size); mutex_unlock(&nfsd_mutex); return rv; } /* * Zero-length write. Return a list of NFSD's current listener * transports. */ static ssize_t __write_ports_names(char *buf, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (nn->nfsd_serv == NULL) return 0; return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* * A single 'fd' number was written, in which case it must be for * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred) { char *mesg = buf; int fd, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; trace_nfsd_ctl_ports_addfd(net, fd); err = nfsd_create_serv(net); if (err != 0) return err; serv = nn->nfsd_serv; err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) nfsd_destroy_serv(net); return err; } /* * A transport listener is added by writing its transport name and * a port number. */ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) { char transport[16]; struct svc_xprt *xprt; int port, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; if (port < 1 || port > USHRT_MAX) return -EINVAL; trace_nfsd_ctl_ports_addxprt(net, transport, port); err = nfsd_create_serv(net); if (err != 0) return err; serv = nn->nfsd_serv; err = svc_xprt_create(serv, transport, net, PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; err = svc_xprt_create(serv, transport, net, PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; return 0; out_close: xprt = svc_find_xprt(serv, transport, net, PF_INET, port); if (xprt != NULL) { svc_xprt_close(xprt); svc_xprt_put(xprt); } out_err: if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) nfsd_destroy_serv(net); return err; } static ssize_t __write_ports(struct file *file, char *buf, size_t size, struct net *net) { if (size == 0) return __write_ports_names(buf, net); if (isdigit(buf[0])) return __write_ports_addfd(buf, net, file->f_cred); if (isalpha(buf[0])) return __write_ports_addxprt(buf, net, file->f_cred); return -EINVAL; } /* * write_ports - Pass a socket file descriptor or transport name to listen on * * Input: * buf: ignored * size: zero * Output: * On success: passed-in buffer filled with a '\n'-terminated C * string containing a whitespace-separated list of * named NFSD listeners; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value * * OR * * Input: * buf: C string containing an unsigned * integer value representing a bound * but unconnected socket that is to be * used as an NFSD listener; listen(3) * must be called for a SOCK_STREAM * socket, otherwise it is ignored * size: non-zero length of C string in @buf * Output: * On success: NFS service is started; * passed-in buffer filled with a '\n'-terminated C * string containing a unique alphanumeric name of * the listener; * return code is the size in bytes of the string * On error: return code is a negative errno value * * OR * * Input: * buf: C string containing a transport * name and an unsigned integer value * representing the port to listen on, * separated by whitespace * size: non-zero length of C string in @buf * Output: * On success: returns zero; NFS service is started * On error: return code is a negative errno value */ static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, netns(file)); mutex_unlock(&nfsd_mutex); return rv; } int nfsd_max_blksize; /* * write_maxblksize - Set or report the current NFS blksize * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing an unsigned * integer value representing the new * NFS blksize * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string * containing numeric value of the current NFS blksize * setting; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { int bsize; int rv = get_int(&mesg, &bsize); if (rv) return rv; trace_nfsd_ctl_maxblksize(netns(file), bsize); /* force bsize into allowed range and * required alignment. */ bsize = max_t(int, bsize, 1024); bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE); bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } nfsd_max_blksize = bsize; mutex_unlock(&nfsd_mutex); } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", nfsd_max_blksize); } /* * write_maxconn - Set or report the current max number of connections * * Input: * buf: ignored * size: zero * OR * * Input: * buf: C string containing an unsigned * integer value representing the new * number of max connections * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string * containing numeric value of max_connections setting * for this net namespace; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) { char *mesg = buf; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); unsigned int maxconn = nn->max_connections; if (size > 0) { int rv = get_uint(&mesg, &maxconn); if (rv) return rv; trace_nfsd_ctl_maxconn(netns(file), maxconn); nn->max_connections = maxconn; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn); } #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) { struct dentry *dentry = file_dentry(file); char *mesg = buf; int rv, i; if (size > 0) { if (nn->nfsd_serv) return -EBUSY; rv = get_int(&mesg, &i); if (rv) return rv; trace_nfsd_ctl_time(netns(file), dentry->d_name.name, dentry->d_name.len, i); /* * Some sanity checking. We don't have a reason for * these particular numbers, but problems with the * extremes are: * - Too short: the briefest network outage may * cause clients to lose all their locks. Also, * the frequent polling may be wasteful. * - Too long: do you really want reboot recovery * to take more than an hour? Or to make other * clients wait an hour before being able to * revoke a dead client's locks? */ if (i < 10 || i > 3600) return -EINVAL; *time = i; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time); } static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) { ssize_t rv; mutex_lock(&nfsd_mutex); rv = __nfsd4_write_time(file, buf, size, time, nn); mutex_unlock(&nfsd_mutex); return rv; } /* * write_leasetime - Set or report the current NFSv4 lease time * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing an unsigned * integer value representing the new * NFSv4 lease expiry time * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C * string containing unsigned integer value of the * current lease expiry time; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } /* * write_gracetime - Set or report current NFSv4 grace period time * * As above, but sets the time of the NFSv4 grace period. * * Note this should never be set to less than the *previous* * lease-period time, but we don't try to enforce this. (In the common * case (a new boot), we don't know what the previous lease time was * anyway.) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, struct nfsd_net *nn) { char *mesg = buf; char *recdir; int len, status; if (size > 0) { if (nn->nfsd_serv) return -EBUSY; if (size > PATH_MAX || buf[size-1] != '\n') return -EINVAL; buf[size-1] = 0; recdir = mesg; len = qword_get(&mesg, recdir, size); if (len <= 0) return -EINVAL; trace_nfsd_ctl_recoverydir(netns(file), recdir); status = nfs4_reset_recoverydir(recdir); if (status) return status; } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", nfs4_recoverydir()); } /* * write_recoverydir - Set or report the pathname of the recovery directory * * Input: * buf: ignored * size: zero * * OR * * Input: * buf: C string containing the pathname * of the directory on a local file * system containing permanent NFSv4 * recovery data * size: non-zero length of C string in @buf * Output: * On success: passed-in buffer filled with '\n'-terminated C string * containing the current recovery pathname setting; * return code is the size in bytes of the string * On error: return code is zero or a negative errno value */ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); mutex_unlock(&nfsd_mutex); return rv; } #endif /* * write_v4_end_grace - release grace period for nfsd's v4.x lock manager * * Input: * buf: ignored * size: zero * OR * * Input: * buf: any value * size: non-zero length of C string in @buf * Output: * passed-in buffer filled with "Y" or "N" with a newline * and NULL-terminated C string. This indicates whether * the grace period has ended in the current net * namespace. Return code is the size in bytes of the * string. Writing a string that starts with 'Y', 'y', or * '1' to the file will end the grace period for nfsd's v4 * lock manager. */ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) { struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); if (size > 0) { switch(buf[0]) { case 'Y': case 'y': case '1': if (!nn->nfsd_serv) return -EBUSY; trace_nfsd_end_grace(netns(file)); nfsd4_end_grace(nn); break; default: return -EINVAL; } } return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n", nn->grace_ended ? 'Y' : 'N'); } #endif /*----------------------------------------------------------------------------*/ /* * populating the filesystem. */ /* Basically copying rpc_get_inode. */ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) return NULL; /* Following advice from simple_fill_super documentation: */ inode->i_ino = iunique(sb, NFSD_MaxReserved); inode->i_mode = mode; simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); break; case S_IFLNK: inode->i_op = &simple_symlink_inode_operations; break; default: break; } return inode; } static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl) { struct inode *inode; inode = nfsd_get_inode(dir->i_sb, mode); if (!inode) return -ENOMEM; if (ncl) { inode->i_private = ncl; kref_get(&ncl->cl_ref); } d_add(dentry, inode); inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; } static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name) { struct inode *dir = parent->d_inode; struct dentry *dentry; int ret = -ENOMEM; inode_lock(dir); dentry = d_alloc_name(parent, name); if (!dentry) goto out_err; ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl); if (ret) goto out_err; out: inode_unlock(dir); return dentry; out_err: dput(dentry); dentry = ERR_PTR(ret); goto out; } #if IS_ENABLED(CONFIG_SUNRPC_GSS) static int __nfsd_symlink(struct inode *dir, struct dentry *dentry, umode_t mode, const char *content) { struct inode *inode; inode = nfsd_get_inode(dir->i_sb, mode); if (!inode) return -ENOMEM; inode->i_link = (char *)content; inode->i_size = strlen(content); d_add(dentry, inode); inc_nlink(dir); fsnotify_create(dir, dentry); return 0; } /* * @content is assumed to be a NUL-terminated string that lives * longer than the symlink itself. */ static void _nfsd_symlink(struct dentry *parent, const char *name, const char *content) { struct inode *dir = parent->d_inode; struct dentry *dentry; int ret; inode_lock(dir); dentry = d_alloc_name(parent, name); if (!dentry) goto out; ret = __nfsd_symlink(d_inode(parent), dentry, S_IFLNK | 0777, content); if (ret) dput(dentry); out: inode_unlock(dir); } #else static inline void _nfsd_symlink(struct dentry *parent, const char *name, const char *content) { } #endif static void clear_ncl(struct dentry *dentry) { struct inode *inode = d_inode(dentry); struct nfsdfs_client *ncl = inode->i_private; spin_lock(&inode->i_lock); inode->i_private = NULL; spin_unlock(&inode->i_lock); kref_put(&ncl->cl_ref, ncl->cl_release); } struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) { struct nfsdfs_client *nc; spin_lock(&inode->i_lock); nc = inode->i_private; if (nc) kref_get(&nc->cl_ref); spin_unlock(&inode->i_lock); return nc; } /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, const struct tree_descr *files, struct nfsdfs_client *ncl, struct dentry **fdentries) { struct inode *dir = d_inode(root); struct inode *inode; struct dentry *dentry; int i; inode_lock(dir); for (i = 0; files->name && files->name[0]; i++, files++) { dentry = d_alloc_name(root, files->name); if (!dentry) goto out; inode = nfsd_get_inode(d_inode(root)->i_sb, S_IFREG | files->mode); if (!inode) { dput(dentry); goto out; } kref_get(&ncl->cl_ref); inode->i_fop = files->ops; inode->i_private = ncl; d_add(dentry, inode); fsnotify_create(dir, dentry); if (fdentries) fdentries[i] = dentry; } inode_unlock(dir); return 0; out: inode_unlock(dir); return -ENOMEM; } /* on success, returns positive number unique to that client. */ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, struct nfsdfs_client *ncl, u32 id, const struct tree_descr *files, struct dentry **fdentries) { struct dentry *dentry; char name[11]; int ret; sprintf(name, "%u", id); dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; ret = nfsdfs_create_files(dentry, files, ncl, fdentries); if (ret) { nfsd_client_rmdir(dentry); return NULL; } return dentry; } /* Taken from __rpc_rmdir: */ void nfsd_client_rmdir(struct dentry *dentry) { simple_recursive_removal(dentry, clear_ncl); } static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) { struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); struct dentry *dentry; int ret; static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, /* Per-export io stats use same ops as exports file */ [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_fops, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_FO_UnlockFS] = {"unlock_filesystem", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &nfsd_reply_cache_stats_fops, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, #endif [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, #endif /* last one */ {""} }; ret = simple_fill_super(sb, 0x6e667364, nfsd_files); if (ret) return ret; _nfsd_symlink(sb->s_root, "supported_krb5_enctypes", "/proc/net/rpc/gss_krb5_enctypes"); dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); if (IS_ERR(dentry)) return PTR_ERR(dentry); nn->nfsd_client_dir = dentry; return 0; } static int nfsd_fs_get_tree(struct fs_context *fc) { return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns)); } static void nfsd_fs_free_fc(struct fs_context *fc) { if (fc->s_fs_info) put_net(fc->s_fs_info); } static const struct fs_context_operations nfsd_fs_context_ops = { .free = nfsd_fs_free_fc, .get_tree = nfsd_fs_get_tree, }; static int nfsd_init_fs_context(struct fs_context *fc) { put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(fc->net_ns->user_ns); fc->ops = &nfsd_fs_context_ops; return 0; } static void nfsd_umount(struct super_block *sb) { struct net *net = sb->s_fs_info; nfsd_shutdown_threads(net); kill_litter_super(sb); put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .init_fs_context = nfsd_init_fs_context, .kill_sb = nfsd_umount, }; MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int exports_proc_open(struct inode *inode, struct file *file) { return exports_net_open(current->nsproxy->net_ns, file); } static const struct proc_ops exports_proc_ops = { .proc_open = exports_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release, }; static int create_proc_exports_entry(void) { struct proc_dir_entry *entry; entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; entry = proc_create("exports", 0, entry, &exports_proc_ops); if (!entry) { remove_proc_entry("fs/nfs", NULL); return -ENOMEM; } return 0; } #else /* CONFIG_PROC_FS */ static int create_proc_exports_entry(void) { return 0; } #endif unsigned int nfsd_net_id; static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, struct nfsd_genl_rqstp *rqstp) { void *hdr; u32 i; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfsd_nl_family, 0, NFSD_CMD_RPC_STATUS_GET); if (!hdr) return -ENOBUFS; if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, rqstp->rq_xid) || nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, rqstp->rq_flags) || nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, rqstp->rq_prog) || nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, rqstp->rq_proc) || nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, rqstp->rq_vers) || nla_put_s64(skb, NFSD_A_RPC_STATUS_SERVICE_TIME, ktime_to_us(rqstp->rq_stime), NFSD_A_RPC_STATUS_PAD)) return -ENOBUFS; switch (rqstp->rq_saddr.sa_family) { case AF_INET: { const struct sockaddr_in *s_in, *d_in; s_in = (const struct sockaddr_in *)&rqstp->rq_saddr; d_in = (const struct sockaddr_in *)&rqstp->rq_daddr; if (nla_put_in_addr(skb, NFSD_A_RPC_STATUS_SADDR4, s_in->sin_addr.s_addr) || nla_put_in_addr(skb, NFSD_A_RPC_STATUS_DADDR4, d_in->sin_addr.s_addr) || nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT, s_in->sin_port) || nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT, d_in->sin_port)) return -ENOBUFS; break; } case AF_INET6: { const struct sockaddr_in6 *s_in, *d_in; s_in = (const struct sockaddr_in6 *)&rqstp->rq_saddr; d_in = (const struct sockaddr_in6 *)&rqstp->rq_daddr; if (nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_SADDR6, &s_in->sin6_addr) || nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_DADDR6, &d_in->sin6_addr) || nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT, s_in->sin6_port) || nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT, d_in->sin6_port)) return -ENOBUFS; break; } } for (i = 0; i < rqstp->rq_opcnt; i++) if (nla_put_u32(skb, NFSD_A_RPC_STATUS_COMPOUND_OPS, rqstp->rq_opnum[i])) return -ENOBUFS; genlmsg_end(skb, hdr); return 0; } /** * nfsd_nl_rpc_status_get_dumpit - Handle rpc_status_get dumpit * @skb: reply buffer * @cb: netlink metadata and command arguments * * Returns the size of the reply or a negative errno. */ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { int i, ret, rqstp_index = 0; struct nfsd_net *nn; mutex_lock(&nfsd_mutex); nn = net_generic(sock_net(skb->sk), nfsd_net_id); if (!nn->nfsd_serv) { ret = -ENODEV; goto out_unlock; } rcu_read_lock(); for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) { struct svc_rqst *rqstp; if (i < cb->args[0]) /* already consumed */ continue; rqstp_index = 0; list_for_each_entry_rcu(rqstp, &nn->nfsd_serv->sv_pools[i].sp_all_threads, rq_all) { struct nfsd_genl_rqstp genl_rqstp; unsigned int status_counter; if (rqstp_index++ < cb->args[1]) /* already consumed */ continue; /* * Acquire rq_status_counter before parsing the rqst * fields. rq_status_counter is set to an odd value in * order to notify the consumers the rqstp fields are * meaningful. */ status_counter = smp_load_acquire(&rqstp->rq_status_counter); if (!(status_counter & 1)) continue; genl_rqstp.rq_xid = rqstp->rq_xid; genl_rqstp.rq_flags = rqstp->rq_flags; genl_rqstp.rq_vers = rqstp->rq_vers; genl_rqstp.rq_prog = rqstp->rq_prog; genl_rqstp.rq_proc = rqstp->rq_proc; genl_rqstp.rq_stime = rqstp->rq_stime; genl_rqstp.rq_opcnt = 0; memcpy(&genl_rqstp.rq_daddr, svc_daddr(rqstp), sizeof(struct sockaddr)); memcpy(&genl_rqstp.rq_saddr, svc_addr(rqstp), sizeof(struct sockaddr)); #ifdef CONFIG_NFSD_V4 if (rqstp->rq_vers == NFS4_VERSION && rqstp->rq_proc == NFSPROC4_COMPOUND) { /* NFSv4 compound */ struct nfsd4_compoundargs *args; int j; args = rqstp->rq_argp; genl_rqstp.rq_opcnt = args->opcnt; for (j = 0; j < genl_rqstp.rq_opcnt; j++) genl_rqstp.rq_opnum[j] = args->ops[j].opnum; } #endif /* CONFIG_NFSD_V4 */ /* * Acquire rq_status_counter before reporting the rqst * fields to the user. */ if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter) continue; ret = nfsd_genl_rpc_status_compose_msg(skb, cb, &genl_rqstp); if (ret) goto out; } } cb->args[0] = i; cb->args[1] = rqstp_index; ret = skb->len; out: rcu_read_unlock(); out_unlock: mutex_unlock(&nfsd_mutex); return ret; } /** * nfsd_nl_threads_set_doit - set the number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; const char *scope = NULL; if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS)) return -EINVAL; /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) count++; } mutex_lock(&nfsd_mutex); nrpools = max(count, nfsd_nrpools(net)); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; goto out_unlock; } i = 0; nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) { nthreads[i++] = nla_get_u32(attr); if (i >= nrpools) break; } } if (info->attrs[NFSD_A_SERVER_GRACETIME] || info->attrs[NFSD_A_SERVER_LEASETIME] || info->attrs[NFSD_A_SERVER_SCOPE]) { ret = -EBUSY; if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads) goto out_unlock; ret = -EINVAL; attr = info->attrs[NFSD_A_SERVER_GRACETIME]; if (attr) { u32 gracetime = nla_get_u32(attr); if (gracetime < 10 || gracetime > 3600) goto out_unlock; nn->nfsd4_grace = gracetime; } attr = info->attrs[NFSD_A_SERVER_LEASETIME]; if (attr) { u32 leasetime = nla_get_u32(attr); if (leasetime < 10 || leasetime > 3600) goto out_unlock; nn->nfsd4_lease = leasetime; } attr = info->attrs[NFSD_A_SERVER_SCOPE]; if (attr) scope = nla_data(attr); } ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope); if (ret > 0) ret = 0; out_unlock: mutex_unlock(&nfsd_mutex); kfree(nthreads); return ret; } /** * nfsd_nl_threads_get_doit - get the number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); void *hdr; int err; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } mutex_lock(&nfsd_mutex); err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME, nn->nfsd4_grace) || nla_put_u32(skb, NFSD_A_SERVER_LEASETIME, nn->nfsd4_lease) || nla_put_string(skb, NFSD_A_SERVER_SCOPE, nn->nfsd_name); if (err) goto err_unlock; if (nn->nfsd_serv) { int i; for (i = 0; i < nfsd_nrpools(net); ++i) { struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, sp->sp_nrthreads); if (err) goto err_unlock; } } else { err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0); if (err) goto err_unlock; } mutex_unlock(&nfsd_mutex); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_unlock: mutex_unlock(&nfsd_mutex); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_nl_version_set_doit - set the nfs enabled versions * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info) { const struct nlattr *attr; struct nfsd_net *nn; int i, rem; if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION)) return -EINVAL; mutex_lock(&nfsd_mutex); nn = net_generic(genl_info_net(info), nfsd_net_id); if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } /* clear current supported versions. */ nfsd_vers(nn, 2, NFSD_CLEAR); nfsd_vers(nn, 3, NFSD_CLEAR); for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) nfsd_minorversion(nn, i, NFSD_CLEAR); nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { struct nlattr *tb[NFSD_A_VERSION_MAX + 1]; u32 major, minor = 0; bool enabled; if (nla_type(attr) != NFSD_A_SERVER_PROTO_VERSION) continue; if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr, nfsd_version_nl_policy, info->extack) < 0) continue; if (!tb[NFSD_A_VERSION_MAJOR]) continue; major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]); if (tb[NFSD_A_VERSION_MINOR]) minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]); enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]); switch (major) { case 4: nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR); break; case 3: case 2: if (!minor) nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR); break; default: break; } } mutex_unlock(&nfsd_mutex); return 0; } /** * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info) { struct nfsd_net *nn; int i, err; void *hdr; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } mutex_lock(&nfsd_mutex); nn = net_generic(genl_info_net(info), nfsd_net_id); for (i = 2; i <= 4; i++) { int j; for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) { struct nlattr *attr; /* Don't record any versions the kernel doesn't have * compiled in */ if (!nfsd_support_version(i)) continue; /* NFSv{2,3} does not support minor numbers */ if (i < 4 && j) continue; attr = nla_nest_start(skb, NFSD_A_SERVER_PROTO_VERSION); if (!attr) { err = -EINVAL; goto err_nfsd_unlock; } if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) || nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) { err = -EINVAL; goto err_nfsd_unlock; } /* Set the enabled flag if the version is enabled */ if (nfsd_vers(nn, i, NFSD_TEST) && (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) && nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) { err = -EINVAL; goto err_nfsd_unlock; } nla_nest_end(skb, attr); } } mutex_unlock(&nfsd_mutex); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_nfsd_unlock: mutex_unlock(&nfsd_mutex); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_nl_listener_set_doit - set the nfs running sockets * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct svc_xprt *xprt, *tmp; const struct nlattr *attr; struct svc_serv *serv; LIST_HEAD(permsocks); struct nfsd_net *nn; int err, rem; mutex_lock(&nfsd_mutex); err = nfsd_create_serv(net); if (err) { mutex_unlock(&nfsd_mutex); return err; } nn = net_generic(net, nfsd_net_id); serv = nn->nfsd_serv; spin_lock_bh(&serv->sv_lock); /* Move all of the old listener sockets to a temp list */ list_splice_init(&serv->sv_permsocks, &permsocks); /* * Walk the list of server_socks from userland and move any that match * back to sv_permsocks */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; const char *xcl_name; struct sockaddr *sa; if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) continue; if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, nfsd_sock_nl_policy, info->extack) < 0) continue; if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) continue; if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) continue; xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); sa = nla_data(tb[NFSD_A_SOCK_ADDR]); /* Put back any matching sockets */ list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) { /* This shouldn't be possible */ if (WARN_ON_ONCE(xprt->xpt_net != net)) { list_move(&xprt->xpt_list, &serv->sv_permsocks); continue; } /* If everything matches, put it back */ if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) && rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) { list_move(&xprt->xpt_list, &serv->sv_permsocks); break; } } } /* For now, no removing old sockets while server is running */ if (serv->sv_nrthreads && !list_empty(&permsocks)) { list_splice_init(&permsocks, &serv->sv_permsocks); spin_unlock_bh(&serv->sv_lock); err = -EBUSY; goto out_unlock_mtx; } /* Close the remaining sockets on the permsocks list */ while (!list_empty(&permsocks)) { xprt = list_first_entry(&permsocks, struct svc_xprt, xpt_list); list_move(&xprt->xpt_list, &serv->sv_permsocks); /* * Newly-created sockets are born with the BUSY bit set. Clear * it if there are no threads, since nothing can pick it up * in that case. */ if (!serv->sv_nrthreads) clear_bit(XPT_BUSY, &xprt->xpt_flags); set_bit(XPT_CLOSE, &xprt->xpt_flags); spin_unlock_bh(&serv->sv_lock); svc_xprt_close(xprt); spin_lock_bh(&serv->sv_lock); } spin_unlock_bh(&serv->sv_lock); /* walk list of addrs again, open any that still don't exist */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; const char *xcl_name; struct sockaddr *sa; int ret; if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) continue; if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, nfsd_sock_nl_policy, info->extack) < 0) continue; if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) continue; if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) continue; xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); sa = nla_data(tb[NFSD_A_SOCK_ADDR]); xprt = svc_find_listener(serv, xcl_name, net, sa); if (xprt) { svc_xprt_put(xprt); continue; } ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) err = ret; } if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) nfsd_destroy_serv(net); out_unlock_mtx: mutex_unlock(&nfsd_mutex); return err; } /** * nfsd_nl_listener_get_doit - get the nfs running listeners * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info) { struct svc_xprt *xprt; struct svc_serv *serv; struct nfsd_net *nn; void *hdr; int err; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; hdr = genlmsg_iput(skb, info); if (!hdr) { err = -EMSGSIZE; goto err_free_msg; } mutex_lock(&nfsd_mutex); nn = net_generic(genl_info_net(info), nfsd_net_id); /* no nfs server? Just send empty socket list */ if (!nn->nfsd_serv) goto out_unlock_mtx; serv = nn->nfsd_serv; spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { struct nlattr *attr; attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR); if (!attr) { err = -EINVAL; goto err_serv_unlock; } if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME, xprt->xpt_class->xcl_name) || nla_put(skb, NFSD_A_SOCK_ADDR, sizeof(struct sockaddr_storage), &xprt->xpt_local)) { err = -EINVAL; goto err_serv_unlock; } nla_nest_end(skb, attr); } spin_unlock_bh(&serv->sv_lock); out_unlock_mtx: mutex_unlock(&nfsd_mutex); genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_serv_unlock: spin_unlock_bh(&serv->sv_lock); mutex_unlock(&nfsd_mutex); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_nl_pool_mode_set_doit - set the number of running threads * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info) { const struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, NFSD_A_POOL_MODE_MODE)) return -EINVAL; attr = info->attrs[NFSD_A_POOL_MODE_MODE]; return sunrpc_set_pool_mode(nla_data(attr)); } /** * nfsd_nl_pool_mode_get_doit - get info about pool_mode * @skb: reply buffer * @info: netlink metadata and command arguments * * Return 0 on success or a negative errno. */ int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); char buf[16]; void *hdr; int err; if (sunrpc_get_pool_mode(buf, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) return -ERANGE; skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; err = -EMSGSIZE; hdr = genlmsg_iput(skb, info); if (!hdr) goto err_free_msg; err = nla_put_string(skb, NFSD_A_POOL_MODE_MODE, buf) | nla_put_u32(skb, NFSD_A_POOL_MODE_NPOOLS, nfsd_nrpools(net)); if (err) goto err_free_msg; genlmsg_end(skb, hdr); return genlmsg_reply(skb, info); err_free_msg: nlmsg_free(skb); return err; } /** * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace * @net: a freshly-created network namespace * * This information stays around as long as the network namespace is * alive whether or not there is an NFSD instance running in the * namespace. * * Returns zero on success, or a negative errno otherwise. */ static __net_init int nfsd_net_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int retval; int i; retval = nfsd_export_init(net); if (retval) goto out_export_error; retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL, NFSD_STATS_COUNTERS_NUM); if (retval) goto out_repcache_error; memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); nn->nfsd_svcstats.program = &nfsd_programs[0]; for (i = 0; i < sizeof(nn->nfsd_versions); i++) nn->nfsd_versions[i] = nfsd_support_version(i); for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++) nn->nfsd4_minorversions[i] = nfsd_support_version(4); nn->nfsd_info.mutex = &nfsd_mutex; nn->nfsd_serv = NULL; nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); nfsd_proc_stat_init(net); #if IS_ENABLED(CONFIG_NFS_LOCALIO) INIT_LIST_HEAD(&nn->local_clients); #endif return 0; out_repcache_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: return retval; } #if IS_ENABLED(CONFIG_NFS_LOCALIO) /** * nfsd_net_pre_exit - Disconnect localio clients from net namespace * @net: a network namespace that is about to be destroyed * * This invalidated ->net pointers held by localio clients * while they can still safely access nn->counter. */ static __net_exit void nfsd_net_pre_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs_uuid_invalidate_clients(&nn->local_clients); } #endif /** * nfsd_net_exit - Release the nfsd_net portion of a net namespace * @net: a network namespace that is about to be destroyed * */ static __net_exit void nfsd_net_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfsd_proc_stat_shutdown(net); percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); } static struct pernet_operations nfsd_net_ops = { .init = nfsd_net_init, #if IS_ENABLED(CONFIG_NFS_LOCALIO) .pre_exit = nfsd_net_pre_exit, #endif .exit = nfsd_net_exit, .id = &nfsd_net_id, .size = sizeof(struct nfsd_net), }; static int __init init_nfsd(void) { int retval; retval = nfsd4_init_slabs(); if (retval) return retval; retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; retval = nfsd_drc_slab_create(); if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) goto out_free_exports; retval = register_cld_notifier(); if (retval) goto out_free_subsys; retval = nfsd4_create_laundry_wq(); if (retval) goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; retval = genl_register_family(&nfsd_nl_family); if (retval) goto out_free_all; nfsd_localio_ops_init(); return 0; out_free_all: nfsd4_destroy_laundry_wq(); out_free_cld: unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); return retval; } static void __exit exit_nfsd(void) { genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_DESCRIPTION("In-kernel NFS server"); MODULE_LICENSE("GPL"); module_init(init_nfsd) module_exit(exit_nfsd)
31 31 31 31 31 31 31 31 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 /* * llc_if.c - Defines LLC interface to upper layer * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/gfp.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/errno.h> #include <net/llc_if.h> #include <net/llc_sap.h> #include <net/llc_s_ev.h> #include <net/llc_conn.h> #include <net/sock.h> #include <net/llc_c_ev.h> #include <net/llc_c_ac.h> #include <net/llc_c_st.h> #include <net/tcp_states.h> /** * llc_build_and_send_pkt - Connection data sending for upper layers. * @sk: connection * @skb: packet to send * * This function is called when upper layer wants to send data using * connection oriented communication mode. During sending data, connection * will be locked and received frames and expired timers will be queued. * Returns 0 for success, -ECONNABORTED when the connection already * closed and -EBUSY when sending data is not permitted in this state or * LLC has send an I pdu with p bit set to 1 and is waiting for it's * response. * * This function always consumes a reference to the skb. */ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev; int rc = -ECONNABORTED; struct llc_sock *llc = llc_sk(sk); if (unlikely(llc->state == LLC_CONN_STATE_ADM)) goto out_free; rc = -EBUSY; if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */ llc->p_flag)) { llc->failed_data_req = 1; goto out_free; } ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_DATA_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; skb->dev = llc->dev; return llc_conn_state_process(sk, skb); out_free: kfree_skb(skb); return rc; } /** * llc_establish_connection - Called by upper layer to establish a conn * @sk: connection * @lmac: local mac address * @dmac: destination mac address * @dsap: destination sap * * Upper layer calls this to establish an LLC connection with a remote * machine. This function packages a proper event and sends it connection * component state machine. Success or failure of connection * establishment will inform to upper layer via calling it's confirm * function and passing proper information. */ int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap) { int rc = -EISCONN; struct llc_addr laddr, daddr; struct sk_buff *skb; struct llc_sock *llc = llc_sk(sk); struct sock *existing; laddr.lsap = llc->sap->laddr.lsap; daddr.lsap = dsap; memcpy(daddr.mac, dmac, sizeof(daddr.mac)); memcpy(laddr.mac, lmac, sizeof(laddr.mac)); existing = llc_lookup_established(llc->sap, &daddr, &laddr, sock_net(sk)); if (existing) { if (existing->sk_state == TCP_ESTABLISHED) { sk = existing; goto out_put; } else sock_put(existing); } sock_hold(sk); rc = -ENOMEM; skb = alloc_skb(0, GFP_ATOMIC); if (skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_CONN_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; skb_set_owner_w(skb, sk); rc = llc_conn_state_process(sk, skb); } out_put: sock_put(sk); return rc; } /** * llc_send_disc - Called by upper layer to close a connection * @sk: connection to be closed * * Upper layer calls this when it wants to close an established LLC * connection with a remote machine. This function packages a proper event * and sends it to connection component state machine. Returns 0 for * success, 1 otherwise. */ int llc_send_disc(struct sock *sk) { u16 rc = 1; struct llc_conn_state_ev *ev; struct sk_buff *skb; sock_hold(sk); if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_ESTABLISHED || llc_sk(sk)->state == LLC_CONN_STATE_ADM || llc_sk(sk)->state == LLC_CONN_OUT_OF_SVC) goto out; /* * Postpone unassigning the connection from its SAP and returning the * connection until all ACTIONs have been completely executed */ skb = alloc_skb(0, GFP_ATOMIC); if (!skb) goto out; skb_set_owner_w(skb, sk); sk->sk_state = TCP_CLOSING; ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_DISC_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; rc = llc_conn_state_process(sk, skb); out: sock_put(sk); return rc; }
314 408 2444 2893 1778 2431 2431 1257 1257 11472 825 1247 2588 173 431 431 431 84 84 84 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 /* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _LINUX_MAPLE_TREE_H #define _LINUX_MAPLE_TREE_H /* * Maple Tree - An RCU-safe adaptive tree for storing ranges * Copyright (c) 2018-2022 Oracle * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com> * Matthew Wilcox <willy@infradead.org> */ #include <linux/kernel.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> /* #define CONFIG_MAPLE_RCU_DISABLED */ /* * Allocated nodes are mutable until they have been inserted into the tree, * at which time they cannot change their type until they have been removed * from the tree and an RCU grace period has passed. * * Removed nodes have their ->parent set to point to themselves. RCU readers * check ->parent before relying on the value that they loaded from the * slots array. This lets us reuse the slots array for the RCU head. * * Nodes in the tree point to their parent unless bit 0 is set. */ #if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) /* 64bit sizes */ #define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) #else /* 32bit sizes */ #define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ #define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ #define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ #define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) #endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ #define MAPLE_NODE_MASK 255UL /* * The node->parent of the root node has bit 0 set and the rest of the pointer * is a pointer to the tree itself. No more bits are available in this pointer * (on m68k, the data structure may only be 2-byte aligned). * * Internal non-root nodes can only have maple_range_* nodes as parents. The * parent pointer is 256B aligned like all other tree nodes. When storing a 32 * or 64 bit values, the offset can fit into 4 bits. The 16 bit values need an * extra bit to store the offset. This extra bit comes from a reuse of the last * bit in the node type. This is possible by using bit 1 to indicate if bit 2 * is part of the type or the slot. * * Once the type is decided, the decision of an allocation range type or a * range type is done by examining the immutable tree flag for the * MT_FLAGS_ALLOC_RANGE flag. * * Node types: * 0x??1 = Root * 0x?00 = 16 bit nodes * 0x010 = 32 bit nodes * 0x110 = 64 bit nodes * * Slot size and location in the parent pointer: * type : slot location * 0x??1 : Root * 0x?00 : 16 bit values, type in 0-1, slot in 2-6 * 0x010 : 32 bit values, type in 0-2, slot in 3-6 * 0x110 : 64 bit values, type in 0-2, slot in 3-6 */ /* * This metadata is used to optimize the gap updating code and in reverse * searching for gaps or any other code that needs to find the end of the data. */ struct maple_metadata { unsigned char end; unsigned char gap; }; /* * Leaf nodes do not store pointers to nodes, they store user data. Users may * store almost any bit pattern. As noted above, the optimisation of storing an * entry at 0 in the root pointer cannot be done for data which have the bottom * two bits set to '10'. We also reserve values with the bottom two bits set to * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use. Some APIs * return errnos as a negative errno shifted right by two bits and the bottom * two bits set to '10', and while choosing to store these values in the array * is not an error, it may lead to confusion if you're testing for an error with * mas_is_err(). * * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits * 3-6), bit 2 is reserved. That leaves bits 0-1 unused for now. * * In regular B-Tree terms, pivots are called keys. The term pivot is used to * indicate that the tree is specifying ranges, Pivots may appear in the * subtree with an entry attached to the value whereas keys are unique to a * specific position of a B-tree. Pivot values are inclusive of the slot with * the same index. */ struct maple_range_64 { struct maple_pnode *parent; unsigned long pivot[MAPLE_RANGE64_SLOTS - 1]; union { void __rcu *slot[MAPLE_RANGE64_SLOTS]; struct { void __rcu *pad[MAPLE_RANGE64_SLOTS - 1]; struct maple_metadata meta; }; }; }; /* * At tree creation time, the user can specify that they're willing to trade off * storing fewer entries in a tree in return for storing more information in * each node. * * The maple tree supports recording the largest range of NULL entries available * in this node, also called gaps. This optimises the tree for allocating a * range. */ struct maple_arange_64 { struct maple_pnode *parent; unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1]; void __rcu *slot[MAPLE_ARANGE64_SLOTS]; unsigned long gap[MAPLE_ARANGE64_SLOTS]; struct maple_metadata meta; }; struct maple_alloc { unsigned long total; unsigned char node_count; unsigned int request_count; struct maple_alloc *slot[MAPLE_ALLOC_SLOTS]; }; struct maple_topiary { struct maple_pnode *parent; struct maple_enode *next; /* Overlaps the pivot */ }; enum maple_type { maple_dense, maple_leaf_64, maple_range_64, maple_arange_64, }; enum store_type { wr_invalid, wr_new_root, wr_store_root, wr_exact_fit, wr_spanning_store, wr_split_store, wr_rebalance, wr_append, wr_node_store, wr_slot_store, }; /** * DOC: Maple tree flags * * * MT_FLAGS_ALLOC_RANGE - Track gaps in this tree * * MT_FLAGS_USE_RCU - Operate in RCU mode * * MT_FLAGS_HEIGHT_OFFSET - The position of the tree height in the flags * * MT_FLAGS_HEIGHT_MASK - The mask for the maple tree height value * * MT_FLAGS_LOCK_MASK - How the mt_lock is used * * MT_FLAGS_LOCK_IRQ - Acquired irq-safe * * MT_FLAGS_LOCK_BH - Acquired bh-safe * * MT_FLAGS_LOCK_EXTERN - mt_lock is not used * * MAPLE_HEIGHT_MAX The largest height that can be stored */ #define MT_FLAGS_ALLOC_RANGE 0x01 #define MT_FLAGS_USE_RCU 0x02 #define MT_FLAGS_HEIGHT_OFFSET 0x02 #define MT_FLAGS_HEIGHT_MASK 0x7C #define MT_FLAGS_LOCK_MASK 0x300 #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 #define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 #define MAPLE_NODE_TYPE_MASK 0x0F #define MAPLE_NODE_TYPE_SHIFT 0x03 #define MAPLE_RESERVED_RANGE 4096 #ifdef CONFIG_LOCKDEP typedef struct lockdep_map *lockdep_map_p; #define mt_lock_is_held(mt) \ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) #define mt_write_lock_is_held(mt) \ (!(mt)->ma_external_lock || \ lock_is_held_type((mt)->ma_external_lock, 0)) #define mt_set_external_lock(mt, lock) \ (mt)->ma_external_lock = &(lock)->dep_map #define mt_on_stack(mt) (mt).ma_external_lock = NULL #else typedef struct { /* nothing */ } lockdep_map_p; #define mt_lock_is_held(mt) 1 #define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) #define mt_on_stack(mt) do { } while (0) #endif /* * If the tree contains a single entry at index 0, it is usually stored in * tree->ma_root. To optimise for the page cache, an entry which ends in '00', * '01' or '11' is stored in the root, but an entry which ends in '10' will be * stored in a node. Bits 3-6 are used to store enum maple_type. * * The flags are used both to store some immutable information about this tree * (set at tree creation time) and dynamic information set under the spinlock. * * Another use of flags are to indicate global states of the tree. This is the * case with the MT_FLAGS_USE_RCU flag, which indicates the tree is currently in * RCU mode. This mode was added to allow the tree to reuse nodes instead of * re-allocating and RCU freeing nodes when there is a single user. */ struct maple_tree { union { spinlock_t ma_lock; lockdep_map_p ma_external_lock; }; unsigned int ma_flags; void __rcu *ma_root; }; /** * MTREE_INIT() - Initialize a maple tree * @name: The maple tree name * @__flags: The maple tree flags * */ #define MTREE_INIT(name, __flags) { \ .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \ .ma_flags = __flags, \ .ma_root = NULL, \ } /** * MTREE_INIT_EXT() - Initialize a maple tree with an external lock. * @name: The tree name * @__flags: The maple tree flags * @__lock: The external lock */ #ifdef CONFIG_LOCKDEP #define MTREE_INIT_EXT(name, __flags, __lock) { \ .ma_external_lock = &(__lock).dep_map, \ .ma_flags = (__flags), \ .ma_root = NULL, \ } #else #define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags) #endif #define DEFINE_MTREE(name) \ struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) #define mtree_lock_nested(mas, subclass) \ spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* * The Maple Tree squeezes various bits in at various points which aren't * necessarily obvious. Usually, this is done by observing that pointers are * N-byte aligned and thus the bottom log_2(N) bits are available for use. We * don't use the high bits of pointers to store additional information because * we don't know what bits are unused on any given architecture. * * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8 * low bits for our own purposes. Nodes are currently of 4 types: * 1. Single pointer (Range is 0-0) * 2. Non-leaf Allocation Range nodes * 3. Non-leaf Range nodes * 4. Leaf Range nodes All nodes consist of a number of node slots, * pivots, and a parent pointer. */ struct maple_node { union { struct { struct maple_pnode *parent; void __rcu *slot[MAPLE_NODE_SLOTS]; }; struct { void *pad; struct rcu_head rcu; struct maple_enode *piv_parent; unsigned char parent_slot; enum maple_type type; unsigned char slot_len; unsigned int ma_flags; }; struct maple_range_64 mr64; struct maple_arange_64 ma64; struct maple_alloc alloc; }; }; /* * More complicated stores can cause two nodes to become one or three and * potentially alter the height of the tree. Either half of the tree may need * to be rebalanced against the other. The ma_topiary struct is used to track * which nodes have been 'cut' from the tree so that the change can be done * safely at a later date. This is done to support RCU. */ struct ma_topiary { struct maple_enode *head; struct maple_enode *tail; struct maple_tree *mtree; }; void *mtree_load(struct maple_tree *mt, unsigned long index); int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); int mtree_insert_range(struct maple_tree *mt, unsigned long first, unsigned long last, void *entry, gfp_t gfp); int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long range_lo, unsigned long range_hi, unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); int mtree_store_range(struct maple_tree *mt, unsigned long first, unsigned long last, void *entry, gfp_t gfp); int mtree_store(struct maple_tree *mt, unsigned long index, void *entry, gfp_t gfp); void *mtree_erase(struct maple_tree *mt, unsigned long index); int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp); void mtree_destroy(struct maple_tree *mt); void __mt_destroy(struct maple_tree *mt); /** * mtree_empty() - Determine if a tree has any present entries. * @mt: Maple Tree. * * Context: Any context. * Return: %true if the tree contains only NULL pointers. */ static inline bool mtree_empty(const struct maple_tree *mt) { return mt->ma_root == NULL; } /* Advanced API */ /* * Maple State Status * ma_active means the maple state is pointing to a node and offset and can * continue operating on the tree. * ma_start means we have not searched the tree. * ma_root means we have searched the tree and the entry we found lives in * the root of the tree (ie it has index 0, length 1 and is the only entry in * the tree). * ma_none means we have searched the tree and there is no node in the * tree for this entry. For example, we searched for index 1 in an empty * tree. Or we have a tree which points to a full leaf node and we * searched for an entry which is larger than can be contained in that * leaf node. * ma_pause means the data within the maple state may be stale, restart the * operation * ma_overflow means the search has reached the upper limit of the search * ma_underflow means the search has reached the lower limit of the search * ma_error means there was an error, check the node for the error number. */ enum maple_status { ma_active, ma_start, ma_root, ma_none, ma_pause, ma_overflow, ma_underflow, ma_error, }; /* * The maple state is defined in the struct ma_state and is used to keep track * of information during operations, and even between operations when using the * advanced API. * * If state->node has bit 0 set then it references a tree location which is not * a node (eg the root). If bit 1 is set, the rest of the bits are a negative * errno. Bit 2 (the 'unallocated slots' bit) is clear. Bits 3-6 indicate the * node type. * * state->alloc either has a request number of nodes or an allocated node. If * stat->alloc has a requested number of nodes, the first bit will be set (0x1) * and the remaining bits are the value. If state->alloc is a node, then the * node will be of type maple_alloc. maple_alloc has MAPLE_NODE_SLOTS - 1 for * storing more allocated nodes, a total number of nodes allocated, and the * node_count in this node. node_count is the number of allocated nodes in this * node. The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further * nodes into state->alloc->slot[0]'s node. Nodes are taken from state->alloc * by removing a node from the state->alloc node until state->alloc->node_count * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted * to state->alloc. Nodes are pushed onto state->alloc by putting the current * state->alloc into the pushed node's slot[0]. * * The state also contains the implied min/max of the state->node, the depth of * this search, and the offset. The implied min/max are either from the parent * node or are 0-oo for the root node. The depth is incremented or decremented * every time a node is walked down or up. The offset is the slot/pivot of * interest in the node - either for reading or writing. * * When returning a value the maple state index and last respectively contain * the start and end of the range for the entry. Ranges are inclusive in the * Maple Tree. * * The status of the state is used to determine how the next action should treat * the state. For instance, if the status is ma_start then the next action * should start at the root of the tree and walk down. If the status is * ma_pause then the node may be stale data and should be discarded. If the * status is ma_overflow, then the last action hit the upper limit. * */ struct ma_state { struct maple_tree *tree; /* The tree we're operating in */ unsigned long index; /* The index we're operating on - range start */ unsigned long last; /* The last index we're operating on - range end */ struct maple_enode *node; /* The node containing this entry */ unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct maple_alloc *alloc; /* Allocated nodes for this operation */ enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; unsigned char end; /* The end of the node */ enum store_type store_type; /* The type of store needed for this operation */ }; struct ma_wr_state { struct ma_state *mas; struct maple_node *node; /* Decoded mas->node */ unsigned long r_min; /* range min */ unsigned long r_max; /* range max */ enum maple_type type; /* mas->node type */ unsigned char offset_end; /* The offset where the write ends */ unsigned long *pivots; /* mas->node->pivots pointer */ unsigned long end_piv; /* The pivot at the offset end */ void __rcu **slots; /* mas->node->slots pointer */ void *entry; /* The entry to write */ void *content; /* The existing entry that is being overwritten */ }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) #define mas_lock_nested(mas, subclass) \ spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) /* * Special values for ma_state.node. * MA_ERROR represents an errno. After dropping the lock and attempting * to resolve the error, the walk would have to be restarted from the * top of the tree as the tree may have been modified. */ #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) #define MA_STATE(name, mt, first, end) \ struct ma_state name = { \ .tree = mt, \ .index = first, \ .last = end, \ .node = NULL, \ .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ .alloc = NULL, \ .mas_flags = 0, \ .store_type = wr_invalid, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ struct ma_wr_state name = { \ .mas = ma_state, \ .content = NULL, \ .entry = wr_entry, \ } #define MA_TOPIARY(name, tree) \ struct ma_topiary name = { \ .head = NULL, \ .tail = NULL, \ .mtree = tree, \ } void *mas_walk(struct ma_state *mas); void *mas_store(struct ma_state *mas, void *entry); void *mas_erase(struct ma_state *mas); int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); void mas_store_prealloc(struct ma_state *mas, void *entry); void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, void *entry, unsigned long range_lo, unsigned long range_hi, unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); void maple_tree_init(void); void mas_destroy(struct ma_state *mas); int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries); void *mas_prev(struct ma_state *mas, unsigned long min); void *mas_prev_range(struct ma_state *mas, unsigned long max); void *mas_next(struct ma_state *mas, unsigned long max); void *mas_next_range(struct ma_state *mas, unsigned long max); int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); /* * This finds an empty area from the highest address to the lowest. * AKA "Topdown" version, */ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, unsigned long addr) { memset(mas, 0, sizeof(struct ma_state)); mas->tree = tree; mas->index = mas->last = addr; mas->max = ULONG_MAX; mas->status = ma_start; mas->node = NULL; } static inline bool mas_is_active(struct ma_state *mas) { return mas->status == ma_active; } static inline bool mas_is_err(struct ma_state *mas) { return mas->status == ma_error; } /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. * * Resets the error or walk state of the @mas so future walks of the * array will start from the root. Use this if you have dropped the * lock and want to reuse the ma_state. * * Context: Any context. */ static __always_inline void mas_reset(struct ma_state *mas) { mas->status = ma_start; mas->node = NULL; } /** * mas_for_each() - Iterate over a range of the maple tree. * @__mas: Maple Tree operation state (maple_state) * @__entry: Entry retrieved from the tree * @__max: maximum index to retrieve from the tree * * When returned, mas->index and mas->last will hold the entire range for the * entry. * * Note: may return the zero entry. */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) /** * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order. * @__mas: Maple Tree operation state (maple_state) * @__entry: Entry retrieved from the tree * @__min: minimum index to retrieve from the tree * * When returned, mas->index and mas->last will hold the entire range for the * entry. * * Note: may return the zero entry. */ #define mas_for_each_rev(__mas, __entry, __min) \ while (((__entry) = mas_find_rev((__mas), (__min))) != NULL) #ifdef CONFIG_DEBUG_MAPLE_TREE enum mt_dump_format { mt_dump_dec, mt_dump_hex, }; extern atomic_t maple_tree_tests_run; extern atomic_t maple_tree_tests_passed; void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); void mas_dump(const struct ma_state *mas); void mas_wr_dump(const struct ma_wr_state *wr_mas); void mt_validate(struct maple_tree *mt); void mt_cache_shrink(void); #define MT_BUG_ON(__tree, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mt_dump(__tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MAS_BUG_ON(__mas, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_dump(__mas); \ mt_dump((__mas)->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MAS_WR_BUG_ON(__wrmas, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ pr_info("BUG at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_wr_dump(__wrmas); \ mas_dump((__wrmas)->mas); \ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ } while (0) #define MT_WARN_ON(__tree, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mt_dump(__tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #define MAS_WARN_ON(__mas, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_dump(__mas); \ mt_dump((__mas)->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #define MAS_WR_WARN_ON(__wrmas, __x) ({ \ int ret = !!(__x); \ atomic_inc(&maple_tree_tests_run); \ if (ret) { \ pr_info("WARN at %s:%d (%u)\n", \ __func__, __LINE__, __x); \ mas_wr_dump(__wrmas); \ mas_dump((__wrmas)->mas); \ mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ pr_info("Pass: %u Run:%u\n", \ atomic_read(&maple_tree_tests_passed), \ atomic_read(&maple_tree_tests_run)); \ dump_stack(); \ } else { \ atomic_inc(&maple_tree_tests_passed); \ } \ unlikely(ret); \ }) #else #define MT_BUG_ON(__tree, __x) BUG_ON(__x) #define MAS_BUG_ON(__mas, __x) BUG_ON(__x) #define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) #define MT_WARN_ON(__tree, __x) WARN_ON(__x) #define MAS_WARN_ON(__mas, __x) WARN_ON(__x) #define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) #endif /* CONFIG_DEBUG_MAPLE_TREE */ /** * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the * current location. * @mas: Maple Tree operation state. * @start: New start of range in the Maple Tree. * @last: New end of range in the Maple Tree. * * set the internal maple state values to a sub-range. * Please use mas_set_range() if you do not know where you are in the tree. */ static inline void __mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { /* Ensure the range starts within the current slot */ MAS_WARN_ON(mas, mas_is_active(mas) && (mas->index > start || mas->last < start)); mas->index = start; mas->last = last; } /** * mas_set_range() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. * @start: New start of range in the Maple Tree. * @last: New end of range in the Maple Tree. * * Move the operation state to refer to a different range. This will * have the effect of starting a walk from the top; see mas_next() * to move to an adjacent index. */ static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { mas_reset(mas); __mas_set_range(mas, start, last); } /** * mas_set() - Set up Maple Tree operation state for a different index. * @mas: Maple Tree operation state. * @index: New index into the Maple Tree. * * Move the operation state to refer to a different index. This will * have the effect of starting a walk from the top; see mas_next() * to move to an adjacent index. */ static inline void mas_set(struct ma_state *mas, unsigned long index) { mas_set_range(mas, index, index); } static inline bool mt_external_lock(const struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN; } /** * mt_init_flags() - Initialise an empty maple tree with flags. * @mt: Maple Tree * @flags: maple tree flags. * * If you need to initialise a Maple Tree with special flags (eg, an * allocation tree), use this function. * * Context: Any context. */ static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags) { mt->ma_flags = flags; if (!mt_external_lock(mt)) spin_lock_init(&mt->ma_lock); rcu_assign_pointer(mt->ma_root, NULL); } /** * mt_init() - Initialise an empty maple tree. * @mt: Maple Tree * * An empty Maple Tree. * * Context: Any context. */ static inline void mt_init(struct maple_tree *mt) { mt_init_flags(mt, 0); } static inline bool mt_in_rcu(struct maple_tree *mt) { #ifdef CONFIG_MAPLE_RCU_DISABLED return false; #endif return mt->ma_flags & MT_FLAGS_USE_RCU; } /** * mt_clear_in_rcu() - Switch the tree to non-RCU mode. * @mt: The Maple Tree */ static inline void mt_clear_in_rcu(struct maple_tree *mt) { if (!mt_in_rcu(mt)) return; if (mt_external_lock(mt)) { WARN_ON(!mt_lock_is_held(mt)); mt->ma_flags &= ~MT_FLAGS_USE_RCU; } else { mtree_lock(mt); mt->ma_flags &= ~MT_FLAGS_USE_RCU; mtree_unlock(mt); } } /** * mt_set_in_rcu() - Switch the tree to RCU safe mode. * @mt: The Maple Tree */ static inline void mt_set_in_rcu(struct maple_tree *mt) { if (mt_in_rcu(mt)) return; if (mt_external_lock(mt)) { WARN_ON(!mt_lock_is_held(mt)); mt->ma_flags |= MT_FLAGS_USE_RCU; } else { mtree_lock(mt); mt->ma_flags |= MT_FLAGS_USE_RCU; mtree_unlock(mt); } } static inline unsigned int mt_height(const struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; } void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_find_after(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min); void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); /** * mt_for_each - Iterate over each entry starting at index until max. * @__tree: The Maple Tree * @__entry: The current entry * @__index: The index to start the search from. Subsequently used as iterator. * @__max: The maximum limit for @index * * This iterator skips all entries, which resolve to a NULL pointer, * e.g. entries which has been reserved with XA_ZERO_ENTRY. */ #define mt_for_each(__tree, __entry, __index, __max) \ for (__entry = mt_find(__tree, &(__index), __max); \ __entry; __entry = mt_find_after(__tree, &(__index), __max)) #endif /*_LINUX_MAPLE_TREE_H */
3201 586 214 282 2 1938 7348 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_ATOMIC_H #define _ASM_X86_ATOMIC_H #include <linux/compiler.h> #include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> #include <asm/rmwcc.h> #include <asm/barrier.h> /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, * it's non-inlined function that increases binary size and stack usage. */ return __READ_ONCE((v)->counter); } static __always_inline void arch_atomic_set(atomic_t *v, int i) { __WRITE_ONCE(v->counter, i); } static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic_add_return arch_atomic_add_return #define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v) static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } #define arch_atomic_fetch_add arch_atomic_fetch_add #define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v) static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic_cmpxchg arch_atomic_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return arch_try_cmpxchg(&v->counter, old, new); } #define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } #define arch_atomic_xchg arch_atomic_xchg static __always_inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } #define arch_atomic_fetch_and arch_atomic_fetch_and static __always_inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } #define arch_atomic_fetch_or arch_atomic_fetch_or static __always_inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) : "ir" (i) : "memory"); } static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } #define arch_atomic_fetch_xor arch_atomic_fetch_xor #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else # include <asm/atomic64_64.h> #endif #endif /* _ASM_X86_ATOMIC_H */
39 1 1 40 26 41 40 37 37 36 36 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _CCID_H #define _CCID_H /* * net/dccp/ccid.h * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * CCID infrastructure */ #include <net/sock.h> #include <linux/compiler.h> #include <linux/dccp.h> #include <linux/list.h> #include <linux/module.h> /* maximum value for a CCID (RFC 4340, 19.5) */ #define CCID_MAX 255 #define CCID_SLAB_NAME_LENGTH 32 struct tcp_info; /** * struct ccid_operations - Interface to Congestion-Control Infrastructure * * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.) * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled) * @ccid_name: alphabetical identifier string for @ccid_id * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket * * @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup) * @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction) * @ccid_hc_rx_packet_recv: implements the HC-receiver side * @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options * @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options * @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender * @ccid_hc_tx_send_packet: implements the sending part of the HC-sender * @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender * @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender * @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender */ struct ccid_operations { unsigned char ccid_id; __u32 ccid_ccmps; const char *ccid_name; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; char ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH]; char ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH]; __u32 ccid_hc_rx_obj_size, ccid_hc_tx_obj_size; /* Interface Routines */ int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); void (*ccid_hc_rx_exit)(struct sock *sk); void (*ccid_hc_tx_exit)(struct sock *sk); void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len); int (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len); int (*ccid_hc_tx_send_packet)(struct sock *sk, struct sk_buff *skb); void (*ccid_hc_tx_packet_sent)(struct sock *sk, unsigned int len); void (*ccid_hc_rx_get_info)(struct sock *sk, struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, struct tcp_info *info); int (*ccid_hc_rx_getsockopt)(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen); int (*ccid_hc_tx_getsockopt)(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen); }; extern struct ccid_operations ccid2_ops; #ifdef CONFIG_IP_DCCP_CCID3 extern struct ccid_operations ccid3_ops; #endif int ccid_initialize_builtins(void); void ccid_cleanup_builtins(void); struct ccid { struct ccid_operations *ccid_ops; char ccid_priv[]; }; static inline void *ccid_priv(const struct ccid *ccid) { return (void *)ccid->ccid_priv; } bool ccid_support_check(u8 const *ccid_array, u8 array_len); int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, char __user *, int __user *); struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) { struct ccid *ccid = dp->dccps_hc_rx_ccid; if (ccid == NULL || ccid->ccid_ops == NULL) return -1; return ccid->ccid_ops->ccid_id; } static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) { struct ccid *ccid = dp->dccps_hc_tx_ccid; if (ccid == NULL || ccid->ccid_ops == NULL) return -1; return ccid->ccid_ops->ccid_id; } void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); /* * Congestion control of queued data packets via CCID decision. * * The TX CCID performs its congestion-control by indicating whether and when a * queued packet may be sent, using the return code of ccid_hc_tx_send_packet(). * The following modes are supported via the symbolic constants below: * - timer-based pacing (CCID returns a delay value in milliseconds); * - autonomous dequeueing (CCID internally schedules dccps_xmitlet). */ enum ccid_dequeueing_decision { CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */ CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */ CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */ CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */ CCID_PACKET_ERR = 0xF0000, /* error condition */ }; static inline int ccid_packet_dequeue_eval(const int return_code) { if (return_code < 0) return CCID_PACKET_ERR; if (return_code == 0) return CCID_PACKET_SEND_AT_ONCE; if (return_code <= CCID_PACKET_DELAY_MAX) return CCID_PACKET_DELAY; return return_code; } static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); return CCID_PACKET_SEND_AT_ONCE; } static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, unsigned int len) { if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len); } static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { if (ccid->ccid_ops->ccid_hc_rx_packet_recv != NULL) ccid->ccid_ops->ccid_hc_rx_packet_recv(sk, skb); } static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { if (ccid->ccid_ops->ccid_hc_tx_packet_recv != NULL) ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); } /** * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver * @pkt: type of packet that @opt appears on (RFC 4340, 5.1) * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3) * @val: value of @opt * @len: length of @val in bytes */ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } /** * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender * Arguments are analogous to ccid_hc_tx_parse_options() */ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { if (ccid->ccid_ops->ccid_hc_rx_insert_options != NULL) return ccid->ccid_ops->ccid_hc_rx_insert_options(sk, skb); return 0; } static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, struct tcp_info *info) { if (ccid->ccid_ops->ccid_hc_rx_get_info != NULL) ccid->ccid_ops->ccid_hc_rx_get_info(sk, info); } static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, struct tcp_info *info) { if (ccid->ccid_ops->ccid_hc_tx_get_info != NULL) ccid->ccid_ops->ccid_hc_tx_get_info(sk, info); } static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; } static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; } #endif /* _CCID_H */
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * */ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" static const struct INDEX_NAMES { const __le16 *name; u8 name_len; } s_index_names[INDEX_MUTEX_TOTAL] = { { I30_NAME, ARRAY_SIZE(I30_NAME) }, { SII_NAME, ARRAY_SIZE(SII_NAME) }, { SDH_NAME, ARRAY_SIZE(SDH_NAME) }, { SO_NAME, ARRAY_SIZE(SO_NAME) }, { SQ_NAME, ARRAY_SIZE(SQ_NAME) }, { SR_NAME, ARRAY_SIZE(SR_NAME) }, }; /* * cmp_fnames - Compare two names in index. * * if l1 != 0 * Both names are little endian on-disk ATTR_FILE_NAME structs. * else * key1 - cpu_str, key2 - ATTR_FILE_NAME */ static int cmp_fnames(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const struct ATTR_FILE_NAME *f2 = key2; const struct ntfs_sb_info *sbi = data; const struct ATTR_FILE_NAME *f1; u16 fsize2; bool both_case; if (l2 <= offsetof(struct ATTR_FILE_NAME, name)) return -1; fsize2 = fname_full_size(f2); if (l2 < fsize2) return -1; both_case = f2->type != FILE_NAME_DOS && !sbi->options->nocase; if (!l1) { const struct le_str *s2 = (struct le_str *)&f2->name_len; /* * If names are equal (case insensitive) * try to compare it case sensitive. */ return ntfs_cmp_names_cpu(key1, s2, sbi->upcase, both_case); } f1 = key1; return ntfs_cmp_names(f1->name, f1->name_len, f2->name, f2->name_len, sbi->upcase, both_case); } /* * cmp_uint - $SII of $Secure and $Q of Quota */ static int cmp_uint(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const u32 *k1 = key1; const u32 *k2 = key2; if (l2 < sizeof(u32)) return -1; if (*k1 < *k2) return -1; if (*k1 > *k2) return 1; return 0; } /* * cmp_sdh - $SDH of $Secure */ static int cmp_sdh(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const struct SECURITY_KEY *k1 = key1; const struct SECURITY_KEY *k2 = key2; u32 t1, t2; if (l2 < sizeof(struct SECURITY_KEY)) return -1; t1 = le32_to_cpu(k1->hash); t2 = le32_to_cpu(k2->hash); /* First value is a hash value itself. */ if (t1 < t2) return -1; if (t1 > t2) return 1; /* Second value is security Id. */ if (data) { t1 = le32_to_cpu(k1->sec_id); t2 = le32_to_cpu(k2->sec_id); if (t1 < t2) return -1; if (t1 > t2) return 1; } return 0; } /* * cmp_uints - $O of ObjId and "$R" for Reparse. */ static int cmp_uints(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const __le32 *k1 = key1; const __le32 *k2 = key2; size_t count; if ((size_t)data == 1) { /* * ni_delete_all -> ntfs_remove_reparse -> * delete all with this reference. * k1, k2 - pointers to REPARSE_KEY */ k1 += 1; // Skip REPARSE_KEY.ReparseTag k2 += 1; // Skip REPARSE_KEY.ReparseTag if (l2 <= sizeof(int)) return -1; l2 -= sizeof(int); if (l1 <= sizeof(int)) return 1; l1 -= sizeof(int); } if (l2 < sizeof(int)) return -1; for (count = min(l1, l2) >> 2; count > 0; --count, ++k1, ++k2) { u32 t1 = le32_to_cpu(*k1); u32 t2 = le32_to_cpu(*k2); if (t1 > t2) return 1; if (t1 < t2) return -1; } if (l1 > l2) return 1; if (l1 < l2) return -1; return 0; } static inline NTFS_CMP_FUNC get_cmp_func(const struct INDEX_ROOT *root) { switch (root->type) { case ATTR_NAME: if (root->rule == NTFS_COLLATION_TYPE_FILENAME) return &cmp_fnames; break; case ATTR_ZERO: switch (root->rule) { case NTFS_COLLATION_TYPE_UINT: return &cmp_uint; case NTFS_COLLATION_TYPE_SECURITY_HASH: return &cmp_sdh; case NTFS_COLLATION_TYPE_UINTS: return &cmp_uints; default: break; } break; default: break; } return NULL; } struct bmp_buf { struct ATTRIB *b; struct mft_inode *mi; struct buffer_head *bh; ulong *buf; size_t bit; u32 nbits; u64 new_valid; }; static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit, struct bmp_buf *bbuf) { struct ATTRIB *b; size_t data_size, valid_size, vbo, off = bit >> 3; struct ntfs_sb_info *sbi = ni->mi.sbi; CLST vcn = off >> sbi->cluster_bits; struct ATTR_LIST_ENTRY *le = NULL; struct buffer_head *bh; struct super_block *sb; u32 blocksize; const struct INDEX_NAMES *in = &s_index_names[indx->type]; bbuf->bh = NULL; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, &vcn, &bbuf->mi); bbuf->b = b; if (!b) return -EINVAL; if (!b->non_res) { data_size = le32_to_cpu(b->res.data_size); if (off >= data_size) return -EINVAL; bbuf->buf = (ulong *)resident_data(b); bbuf->bit = 0; bbuf->nbits = data_size * 8; return 0; } data_size = le64_to_cpu(b->nres.data_size); if (WARN_ON(off >= data_size)) { /* Looks like filesystem error. */ return -EINVAL; } valid_size = le64_to_cpu(b->nres.valid_size); bh = ntfs_bread_run(sbi, &indx->bitmap_run, off); if (!bh) return -EIO; if (IS_ERR(bh)) return PTR_ERR(bh); bbuf->bh = bh; if (buffer_locked(bh)) __wait_on_buffer(bh); lock_buffer(bh); sb = sbi->sb; blocksize = sb->s_blocksize; vbo = off & ~(size_t)sbi->block_mask; bbuf->new_valid = vbo + blocksize; if (bbuf->new_valid <= valid_size) bbuf->new_valid = 0; else if (bbuf->new_valid > data_size) bbuf->new_valid = data_size; if (vbo >= valid_size) { memset(bh->b_data, 0, blocksize); } else if (vbo + blocksize > valid_size) { u32 voff = valid_size & sbi->block_mask; memset(bh->b_data + voff, 0, blocksize - voff); } bbuf->buf = (ulong *)bh->b_data; bbuf->bit = 8 * (off & ~(size_t)sbi->block_mask); bbuf->nbits = 8 * blocksize; return 0; } static void bmp_buf_put(struct bmp_buf *bbuf, bool dirty) { struct buffer_head *bh = bbuf->bh; struct ATTRIB *b = bbuf->b; if (!bh) { if (b && !b->non_res && dirty) bbuf->mi->dirty = true; return; } if (!dirty) goto out; if (bbuf->new_valid) { b->nres.valid_size = cpu_to_le64(bbuf->new_valid); bbuf->mi->dirty = true; } set_buffer_uptodate(bh); mark_buffer_dirty(bh); out: unlock_buffer(bh); put_bh(bh); } /* * indx_mark_used - Mark the bit @bit as used. */ static int indx_mark_used(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit) { int err; struct bmp_buf bbuf; err = bmp_buf_get(indx, ni, bit, &bbuf); if (err) return err; __set_bit_le(bit - bbuf.bit, bbuf.buf); bmp_buf_put(&bbuf, true); return 0; } /* * indx_mark_free - Mark the bit @bit as free. */ static int indx_mark_free(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit) { int err; struct bmp_buf bbuf; err = bmp_buf_get(indx, ni, bit, &bbuf); if (err) return err; __clear_bit_le(bit - bbuf.bit, bbuf.buf); bmp_buf_put(&bbuf, true); return 0; } /* * scan_nres_bitmap * * If ntfs_readdir calls this function (indx_used_bit -> scan_nres_bitmap), * inode is shared locked and no ni_lock. * Use rw_semaphore for read/write access to bitmap_run. */ static int scan_nres_bitmap(struct ntfs_inode *ni, struct ATTRIB *bitmap, struct ntfs_index *indx, size_t from, bool (*fn)(const ulong *buf, u32 bit, u32 bits, size_t *ret), size_t *ret) { struct ntfs_sb_info *sbi = ni->mi.sbi; struct super_block *sb = sbi->sb; struct runs_tree *run = &indx->bitmap_run; struct rw_semaphore *lock = &indx->run_lock; u32 nbits = sb->s_blocksize * 8; u32 blocksize = sb->s_blocksize; u64 valid_size = le64_to_cpu(bitmap->nres.valid_size); u64 data_size = le64_to_cpu(bitmap->nres.data_size); sector_t eblock = bytes_to_block(sb, data_size); size_t vbo = from >> 3; sector_t blk = (vbo & sbi->cluster_mask) >> sb->s_blocksize_bits; sector_t vblock = vbo >> sb->s_blocksize_bits; sector_t blen, block; CLST lcn, clen, vcn, vcn_next; size_t idx; struct buffer_head *bh; bool ok; *ret = MINUS_ONE_T; if (vblock >= eblock) return 0; from &= nbits - 1; vcn = vbo >> sbi->cluster_bits; down_read(lock); ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); up_read(lock); next_run: if (!ok) { int err; const struct INDEX_NAMES *name = &s_index_names[indx->type]; down_write(lock); err = attr_load_runs_vcn(ni, ATTR_BITMAP, name->name, name->name_len, run, vcn); up_write(lock); if (err) return err; down_read(lock); ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); up_read(lock); if (!ok) return -EINVAL; } blen = (sector_t)clen * sbi->blocks_per_cluster; block = (sector_t)lcn * sbi->blocks_per_cluster; for (; blk < blen; blk++, from = 0) { bh = ntfs_bread(sb, block + blk); if (!bh) return -EIO; vbo = (u64)vblock << sb->s_blocksize_bits; if (vbo >= valid_size) { memset(bh->b_data, 0, blocksize); } else if (vbo + blocksize > valid_size) { u32 voff = valid_size & sbi->block_mask; memset(bh->b_data + voff, 0, blocksize - voff); } if (vbo + blocksize > data_size) nbits = 8 * (data_size - vbo); ok = nbits > from ? (*fn)((ulong *)bh->b_data, from, nbits, ret) : false; put_bh(bh); if (ok) { *ret += 8 * vbo; return 0; } if (++vblock >= eblock) { *ret = MINUS_ONE_T; return 0; } } blk = 0; vcn_next = vcn + clen; down_read(lock); ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && vcn == vcn_next; if (!ok) vcn = vcn_next; up_read(lock); goto next_run; } static bool scan_for_free(const ulong *buf, u32 bit, u32 bits, size_t *ret) { size_t pos = find_next_zero_bit_le(buf, bits, bit); if (pos >= bits) return false; *ret = pos; return true; } /* * indx_find_free - Look for free bit. * * Return: -1 if no free bits. */ static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit, struct ATTRIB **bitmap) { struct ATTRIB *b; struct ATTR_LIST_ENTRY *le = NULL; const struct INDEX_NAMES *in = &s_index_names[indx->type]; int err; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, NULL, NULL); if (!b) return -ENOENT; *bitmap = b; *bit = MINUS_ONE_T; if (!b->non_res) { u32 nbits = 8 * le32_to_cpu(b->res.data_size); size_t pos = find_next_zero_bit_le(resident_data(b), nbits, 0); if (pos < nbits) *bit = pos; } else { err = scan_nres_bitmap(ni, b, indx, 0, &scan_for_free, bit); if (err) return err; } return 0; } static bool scan_for_used(const ulong *buf, u32 bit, u32 bits, size_t *ret) { size_t pos = find_next_bit_le(buf, bits, bit); if (pos >= bits) return false; *ret = pos; return true; } /* * indx_used_bit - Look for used bit. * * Return: MINUS_ONE_T if no used bits. */ int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit) { struct ATTRIB *b; struct ATTR_LIST_ENTRY *le = NULL; size_t from = *bit; const struct INDEX_NAMES *in = &s_index_names[indx->type]; int err; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, NULL, NULL); if (!b) return -ENOENT; *bit = MINUS_ONE_T; if (!b->non_res) { u32 nbits = le32_to_cpu(b->res.data_size) * 8; size_t pos = find_next_bit_le(resident_data(b), nbits, from); if (pos < nbits) *bit = pos; } else { err = scan_nres_bitmap(ni, b, indx, from, &scan_for_used, bit); if (err) return err; } return 0; } /* * hdr_find_split * * Find a point at which the index allocation buffer would like to be split. * NOTE: This function should never return 'END' entry NULL returns on error. */ static const struct NTFS_DE *hdr_find_split(const struct INDEX_HDR *hdr) { size_t o; const struct NTFS_DE *e = hdr_first_de(hdr); u32 used_2 = le32_to_cpu(hdr->used) >> 1; u16 esize; if (!e || de_is_last(e)) return NULL; esize = le16_to_cpu(e->size); for (o = le32_to_cpu(hdr->de_off) + esize; o < used_2; o += esize) { const struct NTFS_DE *p = e; e = Add2Ptr(hdr, o); /* We must not return END entry. */ if (de_is_last(e)) return p; esize = le16_to_cpu(e->size); } return e; } /* * hdr_insert_head - Insert some entries at the beginning of the buffer. * * It is used to insert entries into a newly-created buffer. */ static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr, const void *ins, u32 ins_bytes) { u32 to_move; struct NTFS_DE *e = hdr_first_de(hdr); u32 used = le32_to_cpu(hdr->used); if (!e) return NULL; /* Now we just make room for the inserted entries and jam it in. */ to_move = used - le32_to_cpu(hdr->de_off); memmove(Add2Ptr(e, ins_bytes), e, to_move); memcpy(e, ins, ins_bytes); hdr->used = cpu_to_le32(used + ins_bytes); return e; } /* * index_hdr_check * * return true if INDEX_HDR is valid */ static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes) { u32 end = le32_to_cpu(hdr->used); u32 tot = le32_to_cpu(hdr->total); u32 off = le32_to_cpu(hdr->de_off); if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot || off + sizeof(struct NTFS_DE) > end) { /* incorrect index buffer. */ return false; } return true; } /* * index_buf_check * * return true if INDEX_BUFFER seems is valid */ static bool index_buf_check(const struct INDEX_BUFFER *ib, u32 bytes, const CLST *vbn) { const struct NTFS_RECORD_HEADER *rhdr = &ib->rhdr; u16 fo = le16_to_cpu(rhdr->fix_off); u16 fn = le16_to_cpu(rhdr->fix_num); if (bytes <= offsetof(struct INDEX_BUFFER, ihdr) || rhdr->sign != NTFS_INDX_SIGNATURE || fo < sizeof(struct INDEX_BUFFER) /* Check index buffer vbn. */ || (vbn && *vbn != le64_to_cpu(ib->vbn)) || (fo % sizeof(short)) || fo + fn * sizeof(short) >= bytes || fn != ((bytes >> SECTOR_SHIFT) + 1)) { /* incorrect index buffer. */ return false; } return index_hdr_check(&ib->ihdr, bytes - offsetof(struct INDEX_BUFFER, ihdr)); } void fnd_clear(struct ntfs_fnd *fnd) { int i; for (i = fnd->level - 1; i >= 0; i--) { struct indx_node *n = fnd->nodes[i]; if (!n) continue; put_indx_node(n); fnd->nodes[i] = NULL; } fnd->level = 0; fnd->root_de = NULL; } static int fnd_push(struct ntfs_fnd *fnd, struct indx_node *n, struct NTFS_DE *e) { int i = fnd->level; if (i < 0 || i >= ARRAY_SIZE(fnd->nodes)) return -EINVAL; fnd->nodes[i] = n; fnd->de[i] = e; fnd->level += 1; return 0; } static struct indx_node *fnd_pop(struct ntfs_fnd *fnd) { struct indx_node *n; int i = fnd->level; i -= 1; n = fnd->nodes[i]; fnd->nodes[i] = NULL; fnd->level = i; return n; } static bool fnd_is_empty(struct ntfs_fnd *fnd) { if (!fnd->level) return !fnd->root_de; return !fnd->de[fnd->level - 1]; } /* * hdr_find_e - Locate an entry the index buffer. * * If no matching entry is found, it returns the first entry which is greater * than the desired entry If the search key is greater than all the entries the * buffer, it returns the 'end' entry. This function does a binary search of the * current index buffer, for the first entry that is <= to the search value. * * Return: NULL if error. */ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, const struct INDEX_HDR *hdr, const void *key, size_t key_len, const void *ctx, int *diff) { struct NTFS_DE *e, *found = NULL; NTFS_CMP_FUNC cmp = indx->cmp; int min_idx = 0, mid_idx, max_idx = 0; int diff2; int table_size = 8; u32 e_size, e_key_len; u32 end = le32_to_cpu(hdr->used); u32 off = le32_to_cpu(hdr->de_off); u32 total = le32_to_cpu(hdr->total); u16 offs[128]; if (unlikely(!cmp)) return NULL; fill_table: if (end > total) return NULL; if (off + sizeof(struct NTFS_DE) > end) return NULL; e = Add2Ptr(hdr, off); e_size = le16_to_cpu(e->size); if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) return NULL; if (!de_is_last(e)) { offs[max_idx] = off; off += e_size; max_idx++; if (max_idx < table_size) goto fill_table; max_idx--; } binary_search: e_key_len = le16_to_cpu(e->key_size); diff2 = (*cmp)(key, key_len, e + 1, e_key_len, ctx); if (diff2 > 0) { if (found) { min_idx = mid_idx + 1; } else { if (de_is_last(e)) return NULL; max_idx = 0; table_size = min(table_size * 2, (int)ARRAY_SIZE(offs)); goto fill_table; } } else if (diff2 < 0) { if (found) max_idx = mid_idx - 1; else max_idx--; found = e; } else { *diff = 0; return e; } if (min_idx > max_idx) { *diff = -1; return found; } mid_idx = (min_idx + max_idx) >> 1; e = Add2Ptr(hdr, offs[mid_idx]); goto binary_search; } /* * hdr_insert_de - Insert an index entry into the buffer. * * 'before' should be a pointer previously returned from hdr_find_e. */ static struct NTFS_DE *hdr_insert_de(const struct ntfs_index *indx, struct INDEX_HDR *hdr, const struct NTFS_DE *de, struct NTFS_DE *before, const void *ctx) { int diff; size_t off = PtrOffset(hdr, before); u32 used = le32_to_cpu(hdr->used); u32 total = le32_to_cpu(hdr->total); u16 de_size = le16_to_cpu(de->size); /* First, check to see if there's enough room. */ if (used + de_size > total) return NULL; /* We know there's enough space, so we know we'll succeed. */ if (before) { /* Check that before is inside Index. */ if (off >= used || off < le32_to_cpu(hdr->de_off) || off + le16_to_cpu(before->size) > total) { return NULL; } goto ok; } /* No insert point is applied. Get it manually. */ before = hdr_find_e(indx, hdr, de + 1, le16_to_cpu(de->key_size), ctx, &diff); if (!before) return NULL; off = PtrOffset(hdr, before); ok: /* Now we just make room for the entry and jam it in. */ memmove(Add2Ptr(before, de_size), before, used - off); hdr->used = cpu_to_le32(used + de_size); memcpy(before, de, de_size); return before; } /* * hdr_delete_de - Remove an entry from the index buffer. */ static inline struct NTFS_DE *hdr_delete_de(struct INDEX_HDR *hdr, struct NTFS_DE *re) { u32 used = le32_to_cpu(hdr->used); u16 esize = le16_to_cpu(re->size); u32 off = PtrOffset(hdr, re); int bytes = used - (off + esize); /* check INDEX_HDR valid before using INDEX_HDR */ if (!check_index_header(hdr, le32_to_cpu(hdr->total))) return NULL; if (off >= used || esize < sizeof(struct NTFS_DE) || bytes < sizeof(struct NTFS_DE)) return NULL; hdr->used = cpu_to_le32(used - esize); memmove(re, Add2Ptr(re, esize), bytes); return re; } void indx_clear(struct ntfs_index *indx) { run_close(&indx->alloc_run); run_close(&indx->bitmap_run); } int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, const struct ATTRIB *attr, enum index_mutex_classed type) { u32 t32; const struct INDEX_ROOT *root = resident_data(attr); t32 = le32_to_cpu(attr->res.data_size); if (t32 <= offsetof(struct INDEX_ROOT, ihdr) || !index_hdr_check(&root->ihdr, t32 - offsetof(struct INDEX_ROOT, ihdr))) { goto out; } /* Check root fields. */ if (!root->index_block_clst) goto out; indx->type = type; indx->idx2vbn_bits = __ffs(root->index_block_clst); t32 = le32_to_cpu(root->index_block_size); indx->index_bits = blksize_bits(t32); /* Check index record size. */ if (t32 < sbi->cluster_size) { /* Index record is smaller than a cluster, use 512 blocks. */ if (t32 != root->index_block_clst * SECTOR_SIZE) goto out; /* Check alignment to a cluster. */ if ((sbi->cluster_size >> SECTOR_SHIFT) & (root->index_block_clst - 1)) { goto out; } indx->vbn2vbo_bits = SECTOR_SHIFT; } else { /* Index record must be a multiple of cluster size. */ if (t32 != root->index_block_clst << sbi->cluster_bits) goto out; indx->vbn2vbo_bits = sbi->cluster_bits; } init_rwsem(&indx->run_lock); indx->cmp = get_cmp_func(root); if (!indx->cmp) goto out; return 0; out: ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); return -EINVAL; } static struct indx_node *indx_new(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, const __le64 *sub_vbn) { int err; struct NTFS_DE *e; struct indx_node *r; struct INDEX_HDR *hdr; struct INDEX_BUFFER *index; u64 vbo = (u64)vbn << indx->vbn2vbo_bits; u32 bytes = 1u << indx->index_bits; u16 fn; u32 eo; r = kzalloc(sizeof(struct indx_node), GFP_NOFS); if (!r) return ERR_PTR(-ENOMEM); index = kzalloc(bytes, GFP_NOFS); if (!index) { kfree(r); return ERR_PTR(-ENOMEM); } err = ntfs_get_bh(ni->mi.sbi, &indx->alloc_run, vbo, bytes, &r->nb); if (err) { kfree(index); kfree(r); return ERR_PTR(err); } /* Create header. */ index->rhdr.sign = NTFS_INDX_SIGNATURE; index->rhdr.fix_off = cpu_to_le16(sizeof(struct INDEX_BUFFER)); // 0x28 fn = (bytes >> SECTOR_SHIFT) + 1; // 9 index->rhdr.fix_num = cpu_to_le16(fn); index->vbn = cpu_to_le64(vbn); hdr = &index->ihdr; eo = ALIGN(sizeof(struct INDEX_BUFFER) + fn * sizeof(short), 8); hdr->de_off = cpu_to_le32(eo); e = Add2Ptr(hdr, eo); if (sub_vbn) { e->flags = NTFS_IE_LAST | NTFS_IE_HAS_SUBNODES; e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64)); de_set_vbn_le(e, *sub_vbn); hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; } else { e->size = cpu_to_le16(sizeof(struct NTFS_DE)); hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE)); e->flags = NTFS_IE_LAST; } hdr->total = cpu_to_le32(bytes - offsetof(struct INDEX_BUFFER, ihdr)); r->index = index; return r; } struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTRIB **attr, struct mft_inode **mi) { struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *a; const struct INDEX_NAMES *in = &s_index_names[indx->type]; struct INDEX_ROOT *root; a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, mi); if (!a) return NULL; if (attr) *attr = a; root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); /* length check */ if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > le32_to_cpu(a->res.data_size)) { return NULL; } return root; } static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, struct indx_node *node, int sync) { struct INDEX_BUFFER *ib = node->index; return ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &node->nb, sync); } /* * indx_read * * If ntfs_readdir calls this function * inode is shared locked and no ni_lock. * Use rw_semaphore for read/write access to alloc_run. */ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, struct indx_node **node) { int err; struct INDEX_BUFFER *ib; struct runs_tree *run = &indx->alloc_run; struct rw_semaphore *lock = &indx->run_lock; u64 vbo = (u64)vbn << indx->vbn2vbo_bits; u32 bytes = 1u << indx->index_bits; struct indx_node *in = *node; const struct INDEX_NAMES *name; if (!in) { in = kzalloc(sizeof(struct indx_node), GFP_NOFS); if (!in) return -ENOMEM; } else { nb_put(&in->nb); } ib = in->index; if (!ib) { ib = kmalloc(bytes, GFP_NOFS); if (!ib) { err = -ENOMEM; goto out; } } down_read(lock); err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); up_read(lock); if (!err) goto ok; if (err == -E_NTFS_FIXUP) goto ok; if (err != -ENOENT) goto out; name = &s_index_names[indx->type]; down_write(lock); err = attr_load_runs_range(ni, ATTR_ALLOC, name->name, name->name_len, run, vbo, vbo + bytes); up_write(lock); if (err) goto out; down_read(lock); err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); up_read(lock); if (err == -E_NTFS_FIXUP) goto ok; if (err) goto out; ok: if (!index_buf_check(ib, bytes, &vbn)) { ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); err = -EINVAL; goto out; } if (err == -E_NTFS_FIXUP) { ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); err = 0; } /* check for index header length */ if (offsetof(struct INDEX_BUFFER, ihdr) + le32_to_cpu(ib->ihdr.used) > bytes) { err = -EINVAL; goto out; } in->index = ib; *node = in; out: if (err == -E_NTFS_CORRUPT) { ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); err = -EINVAL; } if (ib != in->index) kfree(ib); if (*node != in) { nb_put(&in->nb); kfree(in); } return err; } /* * indx_find - Scan NTFS directory for given entry. */ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, const void *key, size_t key_len, const void *ctx, int *diff, struct NTFS_DE **entry, struct ntfs_fnd *fnd) { int err; struct NTFS_DE *e; struct indx_node *node; if (!root) root = indx_get_root(&ni->dir, ni, NULL, NULL); if (!root) { /* Should not happen. */ return -EINVAL; } /* Check cache. */ e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de; if (e && !de_is_last(e) && !(*indx->cmp)(key, key_len, e + 1, le16_to_cpu(e->key_size), ctx)) { *entry = e; *diff = 0; return 0; } /* Soft finder reset. */ fnd_clear(fnd); /* Lookup entry that is <= to the search value. */ e = hdr_find_e(indx, &root->ihdr, key, key_len, ctx, diff); if (!e) return -EINVAL; fnd->root_de = e; for (;;) { node = NULL; if (*diff >= 0 || !de_has_vcn_ex(e)) break; /* Read next level. */ err = indx_read(indx, ni, de_get_vbn(e), &node); if (err) { /* io error? */ return err; } /* Lookup entry that is <= to the search value. */ e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx, diff); if (!e) { put_indx_node(node); return -EINVAL; } fnd_push(fnd, node, e); } *entry = e; return 0; } int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, struct NTFS_DE **entry, struct ntfs_fnd *fnd) { int err; struct indx_node *n = NULL; struct NTFS_DE *e; size_t iter = 0; int level = fnd->level; if (!*entry) { /* Start find. */ e = hdr_first_de(&root->ihdr); if (!e) return 0; fnd_clear(fnd); fnd->root_de = e; } else if (!level) { if (de_is_last(fnd->root_de)) { *entry = NULL; return 0; } e = hdr_next_de(&root->ihdr, fnd->root_de); if (!e) return -EINVAL; fnd->root_de = e; } else { n = fnd->nodes[level - 1]; e = fnd->de[level - 1]; if (de_is_last(e)) goto pop_level; e = hdr_next_de(&n->index->ihdr, e); if (!e) return -EINVAL; fnd->de[level - 1] = e; } /* Just to avoid tree cycle. */ next_iter: if (iter++ >= 1000) return -EINVAL; while (de_has_vcn_ex(e)) { if (le16_to_cpu(e->size) < sizeof(struct NTFS_DE) + sizeof(u64)) { if (n) { fnd_pop(fnd); kfree(n); } return -EINVAL; } /* Read next level. */ err = indx_read(indx, ni, de_get_vbn(e), &n); if (err) return err; /* Try next level. */ e = hdr_first_de(&n->index->ihdr); if (!e) { kfree(n); return -EINVAL; } fnd_push(fnd, n, e); } if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) { *entry = e; return 0; } pop_level: for (;;) { if (!de_is_last(e)) goto next_iter; /* Pop one level. */ if (n) { fnd_pop(fnd); kfree(n); } level = fnd->level; if (level) { n = fnd->nodes[level - 1]; e = fnd->de[level - 1]; } else if (fnd->root_de) { n = NULL; e = fnd->root_de; fnd->root_de = NULL; } else { *entry = NULL; return 0; } if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) { *entry = e; if (!fnd->root_de) fnd->root_de = e; return 0; } } } int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, struct NTFS_DE **entry, size_t *off, struct ntfs_fnd *fnd) { int err; struct indx_node *n = NULL; struct NTFS_DE *e = NULL; struct NTFS_DE *e2; size_t bit; CLST next_used_vbn; CLST next_vbn; u32 record_size = ni->mi.sbi->record_size; /* Use non sorted algorithm. */ if (!*entry) { /* This is the first call. */ e = hdr_first_de(&root->ihdr); if (!e) return 0; fnd_clear(fnd); fnd->root_de = e; /* The first call with setup of initial element. */ if (*off >= record_size) { next_vbn = (((*off - record_size) >> indx->index_bits)) << indx->idx2vbn_bits; /* Jump inside cycle 'for'. */ goto next; } /* Start enumeration from root. */ *off = 0; } else if (!fnd->root_de) return -EINVAL; for (;;) { /* Check if current entry can be used. */ if (e && le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) goto ok; if (!fnd->level) { /* Continue to enumerate root. */ if (!de_is_last(fnd->root_de)) { e = hdr_next_de(&root->ihdr, fnd->root_de); if (!e) return -EINVAL; fnd->root_de = e; continue; } /* Start to enumerate indexes from 0. */ next_vbn = 0; } else { /* Continue to enumerate indexes. */ e2 = fnd->de[fnd->level - 1]; n = fnd->nodes[fnd->level - 1]; if (!de_is_last(e2)) { e = hdr_next_de(&n->index->ihdr, e2); if (!e) return -EINVAL; fnd->de[fnd->level - 1] = e; continue; } /* Continue with next index. */ next_vbn = le64_to_cpu(n->index->vbn) + root->index_block_clst; } next: /* Release current index. */ if (n) { fnd_pop(fnd); put_indx_node(n); n = NULL; } /* Skip all free indexes. */ bit = next_vbn >> indx->idx2vbn_bits; err = indx_used_bit(indx, ni, &bit); if (err == -ENOENT || bit == MINUS_ONE_T) { /* No used indexes. */ *entry = NULL; return 0; } next_used_vbn = bit << indx->idx2vbn_bits; /* Read buffer into memory. */ err = indx_read(indx, ni, next_used_vbn, &n); if (err) return err; e = hdr_first_de(&n->index->ihdr); fnd_push(fnd, n, e); if (!e) return -EINVAL; } ok: /* Return offset to restore enumerator if necessary. */ if (!n) { /* 'e' points in root, */ *off = PtrOffset(&root->ihdr, e); } else { /* 'e' points in index, */ *off = (le64_to_cpu(n->index->vbn) << indx->vbn2vbo_bits) + record_size + PtrOffset(&n->index->ihdr, e); } *entry = e; return 0; } /* * indx_create_allocate - Create "Allocation + Bitmap" attributes. */ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, CLST *vbn) { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; struct ATTRIB *bitmap; struct ATTRIB *alloc; u32 data_size = 1u << indx->index_bits; u32 alloc_size = ntfs_up_cluster(sbi, data_size); CLST len = alloc_size >> sbi->cluster_bits; const struct INDEX_NAMES *in = &s_index_names[indx->type]; CLST alen; struct runs_tree run; run_init(&run); err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF, &alen, 0, NULL, NULL); if (err) goto out; err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len, &run, 0, len, 0, &alloc, NULL, NULL); if (err) goto out1; alloc->nres.valid_size = alloc->nres.data_size = cpu_to_le64(data_size); err = ni_insert_resident(ni, ntfs3_bitmap_size(1), ATTR_BITMAP, in->name, in->name_len, &bitmap, NULL, NULL); if (err) goto out2; if (in->name == I30_NAME) { i_size_write(&ni->vfs_inode, data_size); inode_set_bytes(&ni->vfs_inode, alloc_size); } memcpy(&indx->alloc_run, &run, sizeof(run)); *vbn = 0; return 0; out2: mi_remove_attr(NULL, &ni->mi, alloc); out1: run_deallocate(sbi, &run, false); out: return err; } /* * indx_add_allocate - Add clusters to index. */ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, CLST *vbn) { int err; size_t bit; u64 data_size; u64 bmp_size, bmp_size_v; struct ATTRIB *bmp, *alloc; struct mft_inode *mi; const struct INDEX_NAMES *in = &s_index_names[indx->type]; err = indx_find_free(indx, ni, &bit, &bmp); if (err) goto out1; if (bit != MINUS_ONE_T) { bmp = NULL; } else { if (bmp->non_res) { bmp_size = le64_to_cpu(bmp->nres.data_size); bmp_size_v = le64_to_cpu(bmp->nres.valid_size); } else { bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size); } bit = bmp_size << 3; } data_size = (u64)(bit + 1) << indx->index_bits; if (bmp) { /* Increase bitmap. */ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, ntfs3_bitmap_size(bit + 1), NULL, true, NULL); if (err) goto out1; } alloc = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, in->name, in->name_len, NULL, &mi); if (!alloc) { err = -EINVAL; if (bmp) goto out2; goto out1; } if (data_size <= le64_to_cpu(alloc->nres.data_size)) { /* Reuse index. */ goto out; } /* Increase allocation. */ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, data_size, &data_size, true, NULL); if (err) { if (bmp) goto out2; goto out1; } if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, data_size); out: *vbn = bit << indx->idx2vbn_bits; return 0; out2: /* Ops. No space? */ attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL); out1: return err; } /* * indx_insert_into_root - Attempt to insert an entry into the index root. * * @undo - True if we undoing previous remove. * If necessary, it will twiddle the index b-tree. */ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *new_de, struct NTFS_DE *root_de, const void *ctx, struct ntfs_fnd *fnd, bool undo) { int err = 0; struct NTFS_DE *e, *e0, *re; struct mft_inode *mi; struct ATTRIB *attr; struct INDEX_HDR *hdr; struct indx_node *n; CLST new_vbn; __le64 *sub_vbn, t_vbn; u16 new_de_size; u32 hdr_used, hdr_total, asize, to_move; u32 root_size, new_root_size; struct ntfs_sb_info *sbi; int ds_root; struct INDEX_ROOT *root, *a_root; /* Get the record this root placed in. */ root = indx_get_root(indx, ni, &attr, &mi); if (!root) return -EINVAL; /* * Try easy case: * hdr_insert_de will succeed if there's * room the root for the new entry. */ hdr = &root->ihdr; sbi = ni->mi.sbi; new_de_size = le16_to_cpu(new_de->size); hdr_used = le32_to_cpu(hdr->used); hdr_total = le32_to_cpu(hdr->total); asize = le32_to_cpu(attr->size); root_size = le32_to_cpu(attr->res.data_size); ds_root = new_de_size + hdr_used - hdr_total; /* If 'undo' is set then reduce requirements. */ if ((undo || asize + ds_root < sbi->max_bytes_per_attr) && mi_resize_attr(mi, attr, ds_root)) { hdr->total = cpu_to_le32(hdr_total + ds_root); e = hdr_insert_de(indx, hdr, new_de, root_de, ctx); WARN_ON(!e); fnd_clear(fnd); fnd->root_de = e; return 0; } /* Make a copy of root attribute to restore if error. */ a_root = kmemdup(attr, asize, GFP_NOFS); if (!a_root) return -ENOMEM; /* * Copy all the non-end entries from * the index root to the new buffer. */ to_move = 0; e0 = hdr_first_de(hdr); /* Calculate the size to copy. */ for (e = e0;; e = hdr_next_de(hdr, e)) { if (!e) { err = -EINVAL; goto out_free_root; } if (de_is_last(e)) break; to_move += le16_to_cpu(e->size); } if (!to_move) { re = NULL; } else { re = kmemdup(e0, to_move, GFP_NOFS); if (!re) { err = -ENOMEM; goto out_free_root; } } sub_vbn = NULL; if (de_has_vcn(e)) { t_vbn = de_get_vbn_le(e); sub_vbn = &t_vbn; } new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE) + sizeof(u64); ds_root = new_root_size - root_size; if (ds_root > 0 && asize + ds_root > sbi->max_bytes_per_attr) { /* Make root external. */ err = -EOPNOTSUPP; goto out_free_re; } if (ds_root) mi_resize_attr(mi, attr, ds_root); /* Fill first entry (vcn will be set later). */ e = (struct NTFS_DE *)(root + 1); memset(e, 0, sizeof(struct NTFS_DE)); e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST; hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; hdr->used = hdr->total = cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr)); fnd->root_de = hdr_first_de(hdr); mi->dirty = true; /* Create alloc and bitmap attributes (if not). */ err = run_is_empty(&indx->alloc_run) ? indx_create_allocate(indx, ni, &new_vbn) : indx_add_allocate(indx, ni, &new_vbn); /* Layout of record may be changed, so rescan root. */ root = indx_get_root(indx, ni, &attr, &mi); if (!root) { /* Bug? */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); err = -EINVAL; goto out_free_re; } if (err) { /* Restore root. */ if (mi_resize_attr(mi, attr, -ds_root)) { memcpy(attr, a_root, asize); } else { /* Bug? */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } goto out_free_re; } e = (struct NTFS_DE *)(root + 1); *(__le64 *)(e + 1) = cpu_to_le64(new_vbn); mi->dirty = true; /* Now we can create/format the new buffer and copy the entries into. */ n = indx_new(indx, ni, new_vbn, sub_vbn); if (IS_ERR(n)) { err = PTR_ERR(n); goto out_free_re; } hdr = &n->index->ihdr; hdr_used = le32_to_cpu(hdr->used); hdr_total = le32_to_cpu(hdr->total); /* Copy root entries into new buffer. */ hdr_insert_head(hdr, re, to_move); /* Update bitmap attribute. */ indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); /* Check if we can insert new entry new index buffer. */ if (hdr_used + new_de_size > hdr_total) { /* * This occurs if MFT record is the same or bigger than index * buffer. Move all root new index and have no space to add * new entry classic case when MFT record is 1K and index * buffer 4K the problem should not occurs. */ kfree(re); indx_write(indx, ni, n, 0); put_indx_node(n); fnd_clear(fnd); err = indx_insert_entry(indx, ni, new_de, ctx, fnd, undo); goto out_free_root; } /* * Now root is a parent for new index buffer. * Insert NewEntry a new buffer. */ e = hdr_insert_de(indx, hdr, new_de, NULL, ctx); if (!e) { err = -EINVAL; goto out_put_n; } fnd_push(fnd, n, e); /* Just write updates index into disk. */ indx_write(indx, ni, n, 0); n = NULL; out_put_n: put_indx_node(n); out_free_re: kfree(re); out_free_root: kfree(a_root); return err; } /* * indx_insert_into_buffer * * Attempt to insert an entry into an Index Allocation Buffer. * If necessary, it will split the buffer. */ static int indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, struct INDEX_ROOT *root, const struct NTFS_DE *new_de, const void *ctx, int level, struct ntfs_fnd *fnd) { int err; const struct NTFS_DE *sp; struct NTFS_DE *e, *de_t, *up_e; struct indx_node *n2; struct indx_node *n1 = fnd->nodes[level]; struct INDEX_HDR *hdr1 = &n1->index->ihdr; struct INDEX_HDR *hdr2; u32 to_copy, used, used1; CLST new_vbn; __le64 t_vbn, *sub_vbn; u16 sp_size; void *hdr1_saved = NULL; /* Try the most easy case. */ e = fnd->level - 1 == level ? fnd->de[level] : NULL; e = hdr_insert_de(indx, hdr1, new_de, e, ctx); fnd->de[level] = e; if (e) { /* Just write updated index into disk. */ indx_write(indx, ni, n1, 0); return 0; } /* * No space to insert into buffer. Split it. * To split we: * - Save split point ('cause index buffers will be changed) * - Allocate NewBuffer and copy all entries <= sp into new buffer * - Remove all entries (sp including) from TargetBuffer * - Insert NewEntry into left or right buffer (depending on sp <=> * NewEntry) * - Insert sp into parent buffer (or root) * - Make sp a parent for new buffer */ sp = hdr_find_split(hdr1); if (!sp) return -EINVAL; sp_size = le16_to_cpu(sp->size); up_e = kmalloc(sp_size + sizeof(u64), GFP_NOFS); if (!up_e) return -ENOMEM; memcpy(up_e, sp, sp_size); used1 = le32_to_cpu(hdr1->used); hdr1_saved = kmemdup(hdr1, used1, GFP_NOFS); if (!hdr1_saved) { err = -ENOMEM; goto out; } if (!hdr1->flags) { up_e->flags |= NTFS_IE_HAS_SUBNODES; up_e->size = cpu_to_le16(sp_size + sizeof(u64)); sub_vbn = NULL; } else { t_vbn = de_get_vbn_le(up_e); sub_vbn = &t_vbn; } /* Allocate on disk a new index allocation buffer. */ err = indx_add_allocate(indx, ni, &new_vbn); if (err) goto out; /* Allocate and format memory a new index buffer. */ n2 = indx_new(indx, ni, new_vbn, sub_vbn); if (IS_ERR(n2)) { err = PTR_ERR(n2); goto out; } hdr2 = &n2->index->ihdr; /* Make sp a parent for new buffer. */ de_set_vbn(up_e, new_vbn); /* Copy all the entries <= sp into the new buffer. */ de_t = hdr_first_de(hdr1); to_copy = PtrOffset(de_t, sp); hdr_insert_head(hdr2, de_t, to_copy); /* Remove all entries (sp including) from hdr1. */ used = used1 - to_copy - sp_size; memmove(de_t, Add2Ptr(sp, sp_size), used - le32_to_cpu(hdr1->de_off)); hdr1->used = cpu_to_le32(used); /* * Insert new entry into left or right buffer * (depending on sp <=> new_de). */ hdr_insert_de(indx, (*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size), up_e + 1, le16_to_cpu(up_e->key_size), ctx) < 0 ? hdr2 : hdr1, new_de, NULL, ctx); indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); indx_write(indx, ni, n1, 0); indx_write(indx, ni, n2, 0); put_indx_node(n2); /* * We've finished splitting everybody, so we are ready to * insert the promoted entry into the parent. */ if (!level) { /* Insert in root. */ err = indx_insert_into_root(indx, ni, up_e, NULL, ctx, fnd, 0); } else { /* * The target buffer's parent is another index buffer. * TODO: Remove recursion. */ err = indx_insert_into_buffer(indx, ni, root, up_e, ctx, level - 1, fnd); } if (err) { /* * Undo critical operations. */ indx_mark_free(indx, ni, new_vbn >> indx->idx2vbn_bits); memcpy(hdr1, hdr1_saved, used1); indx_write(indx, ni, n1, 0); } out: kfree(up_e); kfree(hdr1_saved); return err; } /* * indx_insert_entry - Insert new entry into index. * * @undo - True if we undoing previous remove. */ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *new_de, const void *ctx, struct ntfs_fnd *fnd, bool undo) { int err; int diff; struct NTFS_DE *e; struct ntfs_fnd *fnd_a = NULL; struct INDEX_ROOT *root; if (!fnd) { fnd_a = fnd_get(); if (!fnd_a) { err = -ENOMEM; goto out1; } fnd = fnd_a; } root = indx_get_root(indx, ni, NULL, NULL); if (!root) { err = -EINVAL; goto out; } if (fnd_is_empty(fnd)) { /* * Find the spot the tree where we want to * insert the new entry. */ err = indx_find(indx, ni, root, new_de + 1, le16_to_cpu(new_de->key_size), ctx, &diff, &e, fnd); if (err) goto out; if (!diff) { err = -EEXIST; goto out; } } if (!fnd->level) { /* * The root is also a leaf, so we'll insert the * new entry into it. */ err = indx_insert_into_root(indx, ni, new_de, fnd->root_de, ctx, fnd, undo); } else { /* * Found a leaf buffer, so we'll insert the new entry into it. */ err = indx_insert_into_buffer(indx, ni, root, new_de, ctx, fnd->level - 1, fnd); } out: fnd_put(fnd_a); out1: return err; } /* * indx_find_buffer - Locate a buffer from the tree. */ static struct indx_node *indx_find_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, __le64 vbn, struct indx_node *n) { int err; const struct NTFS_DE *e; struct indx_node *r; const struct INDEX_HDR *hdr = n ? &n->index->ihdr : &root->ihdr; /* Step 1: Scan one level. */ for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) { if (!e) return ERR_PTR(-EINVAL); if (de_has_vcn(e) && vbn == de_get_vbn_le(e)) return n; if (de_is_last(e)) break; } /* Step2: Do recursion. */ e = Add2Ptr(hdr, le32_to_cpu(hdr->de_off)); for (;;) { if (de_has_vcn_ex(e)) { err = indx_read(indx, ni, de_get_vbn(e), &n); if (err) return ERR_PTR(err); r = indx_find_buffer(indx, ni, root, vbn, n); if (r) return r; } if (de_is_last(e)) break; e = Add2Ptr(e, le16_to_cpu(e->size)); } return NULL; } /* * indx_shrink - Deallocate unused tail indexes. */ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit) { int err = 0; u64 bpb, new_data; size_t nbits; struct ATTRIB *b; struct ATTR_LIST_ENTRY *le = NULL; const struct INDEX_NAMES *in = &s_index_names[indx->type]; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, NULL, NULL); if (!b) return -ENOENT; if (!b->non_res) { unsigned long pos; const unsigned long *bm = resident_data(b); nbits = (size_t)le32_to_cpu(b->res.data_size) * 8; if (bit >= nbits) return 0; pos = find_next_bit_le(bm, nbits, bit); if (pos < nbits) return 0; } else { size_t used = MINUS_ONE_T; nbits = le64_to_cpu(b->nres.data_size) * 8; if (bit >= nbits) return 0; err = scan_nres_bitmap(ni, b, indx, bit, &scan_for_used, &used); if (err) return err; if (used != MINUS_ONE_T) return 0; } new_data = (u64)bit << indx->index_bits; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, new_data, &new_data, false, NULL); if (err) return err; if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, new_data); bpb = ntfs3_bitmap_size(bit); if (bpb * 8 == nbits) return 0; err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, bpb, &bpb, false, NULL); return err; } static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *e, bool trim) { int err; struct indx_node *n = NULL; struct INDEX_HDR *hdr; CLST vbn = de_get_vbn(e); size_t i; err = indx_read(indx, ni, vbn, &n); if (err) return err; hdr = &n->index->ihdr; /* First, recurse into the children, if any. */ if (hdr_has_subnode(hdr)) { for (e = hdr_first_de(hdr); e; e = hdr_next_de(hdr, e)) { indx_free_children(indx, ni, e, false); if (de_is_last(e)) break; } } put_indx_node(n); i = vbn >> indx->idx2vbn_bits; /* * We've gotten rid of the children; add this buffer to the free list. */ indx_mark_free(indx, ni, i); if (!trim) return 0; /* * If there are no used indexes after current free index * then we can truncate allocation and bitmap. * Use bitmap to estimate the case. */ indx_shrink(indx, ni, i + 1); return 0; } /* * indx_get_entry_to_replace * * Find a replacement entry for a deleted entry. * Always returns a node entry: * NTFS_IE_HAS_SUBNODES is set the flags and the size includes the sub_vcn. */ static int indx_get_entry_to_replace(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *de_next, struct NTFS_DE **de_to_replace, struct ntfs_fnd *fnd) { int err; int level = -1; CLST vbn; struct NTFS_DE *e, *te, *re; struct indx_node *n; struct INDEX_BUFFER *ib; *de_to_replace = NULL; /* Find first leaf entry down from de_next. */ vbn = de_get_vbn(de_next); for (;;) { n = NULL; err = indx_read(indx, ni, vbn, &n); if (err) goto out; e = hdr_first_de(&n->index->ihdr); fnd_push(fnd, n, e); if (!de_is_last(e)) { /* * This buffer is non-empty, so its first entry * could be used as the replacement entry. */ level = fnd->level - 1; } if (!de_has_vcn(e)) break; /* This buffer is a node. Continue to go down. */ vbn = de_get_vbn(e); } if (level == -1) goto out; n = fnd->nodes[level]; te = hdr_first_de(&n->index->ihdr); /* Copy the candidate entry into the replacement entry buffer. */ re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS); if (!re) { err = -ENOMEM; goto out; } *de_to_replace = re; memcpy(re, te, le16_to_cpu(te->size)); if (!de_has_vcn(re)) { /* * The replacement entry we found doesn't have a sub_vcn. * increase its size to hold one. */ le16_add_cpu(&re->size, sizeof(u64)); re->flags |= NTFS_IE_HAS_SUBNODES; } else { /* * The replacement entry we found was a node entry, which * means that all its child buffers are empty. Return them * to the free pool. */ indx_free_children(indx, ni, te, true); } /* * Expunge the replacement entry from its former location, * and then write that buffer. */ ib = n->index; e = hdr_delete_de(&ib->ihdr, te); fnd->de[level] = e; indx_write(indx, ni, n, 0); if (ib_is_leaf(ib) && ib_is_empty(ib)) { /* An empty leaf. */ return 0; } out: fnd_clear(fnd); return err; } /* * indx_delete_entry - Delete an entry from the index. */ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, const void *key, u32 key_len, const void *ctx) { int err, diff; struct INDEX_ROOT *root; struct INDEX_HDR *hdr; struct ntfs_fnd *fnd, *fnd2; struct INDEX_BUFFER *ib; struct NTFS_DE *e, *re, *next, *prev, *me; struct indx_node *n, *n2d = NULL; __le64 sub_vbn; int level, level2; struct ATTRIB *attr; struct mft_inode *mi; u32 e_size, root_size, new_root_size; size_t trim_bit; const struct INDEX_NAMES *in; fnd = fnd_get(); if (!fnd) { err = -ENOMEM; goto out2; } fnd2 = fnd_get(); if (!fnd2) { err = -ENOMEM; goto out1; } root = indx_get_root(indx, ni, &attr, &mi); if (!root) { err = -EINVAL; goto out; } /* Locate the entry to remove. */ err = indx_find(indx, ni, root, key, key_len, ctx, &diff, &e, fnd); if (err) goto out; if (!e || diff) { err = -ENOENT; goto out; } level = fnd->level; if (level) { n = fnd->nodes[level - 1]; e = fnd->de[level - 1]; ib = n->index; hdr = &ib->ihdr; } else { hdr = &root->ihdr; e = fnd->root_de; n = NULL; } e_size = le16_to_cpu(e->size); if (!de_has_vcn_ex(e)) { /* The entry to delete is a leaf, so we can just rip it out. */ hdr_delete_de(hdr, e); if (!level) { hdr->total = hdr->used; /* Shrink resident root attribute. */ mi_resize_attr(mi, attr, 0 - e_size); goto out; } indx_write(indx, ni, n, 0); /* * Check to see if removing that entry made * the leaf empty. */ if (ib_is_leaf(ib) && ib_is_empty(ib)) { fnd_pop(fnd); fnd_push(fnd2, n, e); } } else { /* * The entry we wish to delete is a node buffer, so we * have to find a replacement for it. */ next = de_get_next(e); err = indx_get_entry_to_replace(indx, ni, next, &re, fnd2); if (err) goto out; if (re) { de_set_vbn_le(re, de_get_vbn_le(e)); hdr_delete_de(hdr, e); err = level ? indx_insert_into_buffer(indx, ni, root, re, ctx, fnd->level - 1, fnd) : indx_insert_into_root(indx, ni, re, e, ctx, fnd, 0); kfree(re); if (err) goto out; } else { /* * There is no replacement for the current entry. * This means that the subtree rooted at its node * is empty, and can be deleted, which turn means * that the node can just inherit the deleted * entry sub_vcn. */ indx_free_children(indx, ni, next, true); de_set_vbn_le(next, de_get_vbn_le(e)); hdr_delete_de(hdr, e); if (level) { indx_write(indx, ni, n, 0); } else { hdr->total = hdr->used; /* Shrink resident root attribute. */ mi_resize_attr(mi, attr, 0 - e_size); } } } /* Delete a branch of tree. */ if (!fnd2 || !fnd2->level) goto out; /* Reinit root 'cause it can be changed. */ root = indx_get_root(indx, ni, &attr, &mi); if (!root) { err = -EINVAL; goto out; } n2d = NULL; sub_vbn = fnd2->nodes[0]->index->vbn; level2 = 0; level = fnd->level; hdr = level ? &fnd->nodes[level - 1]->index->ihdr : &root->ihdr; /* Scan current level. */ for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) { if (!e) { err = -EINVAL; goto out; } if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e)) break; if (de_is_last(e)) { e = NULL; break; } } if (!e) { /* Do slow search from root. */ struct indx_node *in; fnd_clear(fnd); in = indx_find_buffer(indx, ni, root, sub_vbn, NULL); if (IS_ERR(in)) { err = PTR_ERR(in); goto out; } if (in) fnd_push(fnd, in, NULL); } /* Merge fnd2 -> fnd. */ for (level = 0; level < fnd2->level; level++) { fnd_push(fnd, fnd2->nodes[level], fnd2->de[level]); fnd2->nodes[level] = NULL; } fnd2->level = 0; hdr = NULL; for (level = fnd->level; level; level--) { struct indx_node *in = fnd->nodes[level - 1]; ib = in->index; if (ib_is_empty(ib)) { sub_vbn = ib->vbn; } else { hdr = &ib->ihdr; n2d = in; level2 = level; break; } } if (!hdr) hdr = &root->ihdr; e = hdr_first_de(hdr); if (!e) { err = -EINVAL; goto out; } if (hdr != &root->ihdr || !de_is_last(e)) { prev = NULL; while (!de_is_last(e)) { if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e)) break; prev = e; e = hdr_next_de(hdr, e); if (!e) { err = -EINVAL; goto out; } } if (sub_vbn != de_get_vbn_le(e)) { /* * Didn't find the parent entry, although this buffer * is the parent trail. Something is corrupt. */ err = -EINVAL; goto out; } if (de_is_last(e)) { /* * Since we can't remove the end entry, we'll remove * its predecessor instead. This means we have to * transfer the predecessor's sub_vcn to the end entry. * Note: This index block is not empty, so the * predecessor must exist. */ if (!prev) { err = -EINVAL; goto out; } if (de_has_vcn(prev)) { de_set_vbn_le(e, de_get_vbn_le(prev)); } else if (de_has_vcn(e)) { le16_sub_cpu(&e->size, sizeof(u64)); e->flags &= ~NTFS_IE_HAS_SUBNODES; le32_sub_cpu(&hdr->used, sizeof(u64)); } e = prev; } /* * Copy the current entry into a temporary buffer (stripping * off its down-pointer, if any) and delete it from the current * buffer or root, as appropriate. */ e_size = le16_to_cpu(e->size); me = kmemdup(e, e_size, GFP_NOFS); if (!me) { err = -ENOMEM; goto out; } if (de_has_vcn(me)) { me->flags &= ~NTFS_IE_HAS_SUBNODES; le16_sub_cpu(&me->size, sizeof(u64)); } hdr_delete_de(hdr, e); if (hdr == &root->ihdr) { level = 0; hdr->total = hdr->used; /* Shrink resident root attribute. */ mi_resize_attr(mi, attr, 0 - e_size); } else { indx_write(indx, ni, n2d, 0); level = level2; } /* Mark unused buffers as free. */ trim_bit = -1; for (; level < fnd->level; level++) { ib = fnd->nodes[level]->index; if (ib_is_empty(ib)) { size_t k = le64_to_cpu(ib->vbn) >> indx->idx2vbn_bits; indx_mark_free(indx, ni, k); if (k < trim_bit) trim_bit = k; } } fnd_clear(fnd); /*fnd->root_de = NULL;*/ /* * Re-insert the entry into the tree. * Find the spot the tree where we want to insert the new entry. */ err = indx_insert_entry(indx, ni, me, ctx, fnd, 0); kfree(me); if (err) goto out; if (trim_bit != -1) indx_shrink(indx, ni, trim_bit); } else { /* * This tree needs to be collapsed down to an empty root. * Recreate the index root as an empty leaf and free all * the bits the index allocation bitmap. */ fnd_clear(fnd); fnd_clear(fnd2); in = &s_index_names[indx->type]; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, 0, NULL, false, NULL); if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, 0); err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len, false, NULL); run_close(&indx->alloc_run); err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, 0, NULL, false, NULL); err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len, false, NULL); run_close(&indx->bitmap_run); root = indx_get_root(indx, ni, &attr, &mi); if (!root) { err = -EINVAL; goto out; } root_size = le32_to_cpu(attr->res.data_size); new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE); if (new_root_size != root_size && !mi_resize_attr(mi, attr, new_root_size - root_size)) { err = -EINVAL; goto out; } /* Fill first entry. */ e = (struct NTFS_DE *)(root + 1); e->ref.low = 0; e->ref.high = 0; e->ref.seq = 0; e->size = cpu_to_le16(sizeof(struct NTFS_DE)); e->flags = NTFS_IE_LAST; // 0x02 e->key_size = 0; e->res = 0; hdr = &root->ihdr; hdr->flags = 0; hdr->used = hdr->total = cpu_to_le32( new_root_size - offsetof(struct INDEX_ROOT, ihdr)); mi->dirty = true; } out: fnd_put(fnd2); out1: fnd_put(fnd); out2: return err; } /* * Update duplicated information in directory entry * 'dup' - info from MFT record */ int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi, const struct ATTR_FILE_NAME *fname, const struct NTFS_DUP_INFO *dup, int sync) { int err, diff; struct NTFS_DE *e = NULL; struct ATTR_FILE_NAME *e_fname; struct ntfs_fnd *fnd; struct INDEX_ROOT *root; struct mft_inode *mi; struct ntfs_index *indx = &ni->dir; fnd = fnd_get(); if (!fnd) return -ENOMEM; root = indx_get_root(indx, ni, NULL, &mi); if (!root) { err = -EINVAL; goto out; } /* Find entry in directory. */ err = indx_find(indx, ni, root, fname, fname_full_size(fname), sbi, &diff, &e, fnd); if (err) goto out; if (!e) { err = -EINVAL; goto out; } if (diff) { err = -EINVAL; goto out; } e_fname = (struct ATTR_FILE_NAME *)(e + 1); if (!memcmp(&e_fname->dup, dup, sizeof(*dup))) { /* * Nothing to update in index! Try to avoid this call. */ goto out; } memcpy(&e_fname->dup, dup, sizeof(*dup)); if (fnd->level) { /* Directory entry in index. */ err = indx_write(indx, ni, fnd->nodes[fnd->level - 1], sync); } else { /* Directory entry in directory MFT record. */ mi->dirty = true; if (sync) err = mi_write(mi, 1); else mark_inode_dirty(&ni->vfs_inode); } out: fnd_put(fnd); return err; }
18734 18711 18852 18724 18723 18726 18860 18726 18854 18750 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor network mediation * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2017 Canonical Ltd. */ #include "include/apparmor.h" #include "include/audit.h" #include "include/cred.h" #include "include/label.h" #include "include/net.h" #include "include/policy.h" #include "include/secid.h" #include "net_names.h" struct aa_sfs_entry aa_sfs_entry_network[] = { AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), { } }; static const char * const net_mask_names[] = { "unknown", "send", "receive", "unknown", "create", "shutdown", "connect", "unknown", "setattr", "getattr", "setcred", "getcred", "chmod", "chown", "chgrp", "lock", "mmap", "mprot", "unknown", "unknown", "accept", "bind", "listen", "unknown", "setopt", "getopt", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", }; /* audit callback for net specific fields */ void audit_net_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); if (address_family_names[sa->u.net->family]) audit_log_format(ab, " family=\"%s\"", address_family_names[sa->u.net->family]); else audit_log_format(ab, " family=\"unknown(%d)\"", sa->u.net->family); if (sock_type_names[ad->net.type]) audit_log_format(ab, " sock_type=\"%s\"", sock_type_names[ad->net.type]); else audit_log_format(ab, " sock_type=\"unknown(%d)\"", ad->net.type); audit_log_format(ab, " protocol=%d", ad->net.protocol); if (ad->request & NET_PERMS_MASK) { audit_log_format(ab, " requested_mask="); aa_audit_perm_mask(ab, ad->request, NULL, 0, net_mask_names, NET_PERMS_MASK); if (ad->denied & NET_PERMS_MASK) { audit_log_format(ab, " denied_mask="); aa_audit_perm_mask(ab, ad->denied, NULL, 0, net_mask_names, NET_PERMS_MASK); } } if (ad->peer) { audit_log_format(ab, " peer="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } } /* Generic af perm */ int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, u16 family, int type) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_perms perms = { }; aa_state_t state; __be16 buffer[2]; AA_BUG(family >= AF_MAX); AA_BUG(type < 0 || type >= SOCK_MAX); if (profile_unconfined(profile)) return 0; state = RULE_MEDIATES(rules, AA_CLASS_NET); if (!state) return 0; buffer[0] = cpu_to_be16(family); buffer[1] = cpu_to_be16((u16) type); state = aa_dfa_match_len(rules->policy->dfa, state, (char *) &buffer, 4); perms = *aa_lookup_perms(rules->policy, state); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, ad, audit_net_cb); } int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol) { struct aa_profile *profile; DEFINE_AUDIT_NET(ad, op, NULL, family, type, protocol); return fn_for_each_confined(label, profile, aa_profile_af_perm(profile, &ad, request, family, type)); } static int aa_label_sk_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct sock *sk) { struct aa_sk_ctx *ctx = aa_sock(sk); int error = 0; AA_BUG(!label); AA_BUG(!sk); if (ctx->label != kernel_t && !unconfined(label)) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, sk); ad.subj_cred = subj_cred; error = fn_for_each_confined(label, profile, aa_profile_af_sk_perm(profile, &ad, request, sk)); } return error; } int aa_sk_perm(const char *op, u32 request, struct sock *sk) { struct aa_label *label; int error; AA_BUG(!sk); AA_BUG(in_interrupt()); /* TODO: switch to begin_current_label ???? */ label = begin_current_label_crit_section(); error = aa_label_sk_perm(current_cred(), label, op, request, sk); end_current_label_crit_section(label); return error; } int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, const char *op, u32 request, struct socket *sock) { AA_BUG(!label); AA_BUG(!sock); AA_BUG(!sock->sk); return aa_label_sk_perm(subj_cred, label, op, request, sock->sk); } #ifdef CONFIG_NETWORK_SECMARK static int apparmor_secmark_init(struct aa_secmark *secmark) { struct aa_label *label; if (secmark->label[0] == '*') { secmark->secid = AA_SECID_WILDCARD; return 0; } label = aa_label_strn_parse(&root_ns->unconfined->label, secmark->label, strlen(secmark->label), GFP_ATOMIC, false, false); if (IS_ERR(label)) return PTR_ERR(label); secmark->secid = label->secid; return 0; } static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, struct apparmor_audit_data *ad) { int i, ret; struct aa_perms perms = { }; struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); if (rules->secmark_count == 0) return 0; for (i = 0; i < rules->secmark_count; i++) { if (!rules->secmark[i].secid) { ret = apparmor_secmark_init(&rules->secmark[i]); if (ret) return ret; } if (rules->secmark[i].secid == secid || rules->secmark[i].secid == AA_SECID_WILDCARD) { if (rules->secmark[i].deny) perms.deny = ALL_PERMS_MASK; else perms.allow = ALL_PERMS_MASK; if (rules->secmark[i].audit) perms.audit = ALL_PERMS_MASK; } } aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, ad, audit_net_cb); } int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, u32 secid, const struct sock *sk) { struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, sk); return fn_for_each_confined(label, profile, aa_secmark_perm(profile, request, secid, &ad)); } #endif
73 76 73 28 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 #ifndef __DRM_GEM_H__ #define __DRM_GEM_H__ /* * GEM Graphics Execution Manager Driver Interfaces * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * Copyright (c) 2009-2010, Code Aurora Forum. * All rights reserved. * Copyright © 2014 Intel Corporation * Daniel Vetter <daniel.vetter@ffwll.ch> * * Author: Rickard E. (Rik) Faith <faith@valinux.com> * Author: Gareth Hughes <gareth@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/kref.h> #include <linux/dma-resv.h> #include <linux/list.h> #include <linux/mutex.h> #include <drm/drm_vma_manager.h> struct iosys_map; struct drm_gem_object; /** * enum drm_gem_object_status - bitmask of object state for fdinfo reporting * @DRM_GEM_OBJECT_RESIDENT: object is resident in memory (ie. not unpinned) * @DRM_GEM_OBJECT_PURGEABLE: object marked as purgeable by userspace * * Bitmask of status used for fdinfo memory stats, see &drm_gem_object_funcs.status * and drm_show_fdinfo(). Note that an object can DRM_GEM_OBJECT_PURGEABLE if * it still active or not resident, in which case drm_show_fdinfo() will not * account for it as purgeable. So drivers do not need to check if the buffer * is idle and resident to return this bit. (Ie. userspace can mark a buffer * as purgeable even while it is still busy on the GPU.. it does not _actually_ * become puregeable until it becomes idle. The status gem object func does * not need to consider this.) */ enum drm_gem_object_status { DRM_GEM_OBJECT_RESIDENT = BIT(0), DRM_GEM_OBJECT_PURGEABLE = BIT(1), }; /** * struct drm_gem_object_funcs - GEM object functions */ struct drm_gem_object_funcs { /** * @free: * * Deconstructor for drm_gem_objects. * * This callback is mandatory. */ void (*free)(struct drm_gem_object *obj); /** * @open: * * Called upon GEM handle creation. * * This callback is optional. */ int (*open)(struct drm_gem_object *obj, struct drm_file *file); /** * @close: * * Called upon GEM handle release. * * This callback is optional. */ void (*close)(struct drm_gem_object *obj, struct drm_file *file); /** * @print_info: * * If driver subclasses struct &drm_gem_object, it can implement this * optional hook for printing additional driver specific info. * * drm_printf_indent() should be used in the callback passing it the * indent argument. * * This callback is called from drm_gem_print_info(). * * This callback is optional. */ void (*print_info)(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj); /** * @export: * * Export backing buffer as a &dma_buf. * If this is not set drm_gem_prime_export() is used. * * This callback is optional. */ struct dma_buf *(*export)(struct drm_gem_object *obj, int flags); /** * @pin: * * Pin backing buffer in memory. Used by the drm_gem_map_attach() helper. * * This callback is optional. */ int (*pin)(struct drm_gem_object *obj); /** * @unpin: * * Unpin backing buffer. Used by the drm_gem_map_detach() helper. * * This callback is optional. */ void (*unpin)(struct drm_gem_object *obj); /** * @get_sg_table: * * Returns a Scatter-Gather table representation of the buffer. * Used when exporting a buffer by the drm_gem_map_dma_buf() helper. * Releasing is done by calling dma_unmap_sg_attrs() and sg_free_table() * in drm_gem_unmap_buf(), therefore these helpers and this callback * here cannot be used for sg tables pointing at driver private memory * ranges. * * See also drm_prime_pages_to_sg(). */ struct sg_table *(*get_sg_table)(struct drm_gem_object *obj); /** * @vmap: * * Returns a virtual address for the buffer. Used by the * drm_gem_dmabuf_vmap() helper. * * This callback is optional. */ int (*vmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @vunmap: * * Releases the address previously returned by @vmap. Used by the * drm_gem_dmabuf_vunmap() helper. * * This callback is optional. */ void (*vunmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @mmap: * * Handle mmap() of the gem object, setup vma accordingly. * * This callback is optional. * * The callback is used by both drm_gem_mmap_obj() and * drm_gem_prime_mmap(). When @mmap is present @vm_ops is not * used, the @mmap callback must set vma->vm_ops instead. */ int (*mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma); /** * @evict: * * Evicts gem object out from memory. Used by the drm_gem_object_evict() * helper. Returns 0 on success, -errno otherwise. * * This callback is optional. */ int (*evict)(struct drm_gem_object *obj); /** * @status: * * The optional status callback can return additional object state * which determines which stats the object is counted against. The * callback is called under table_lock. Racing against object status * change is "harmless", and the callback can expect to not race * against object destruction. * * Called by drm_show_memory_stats(). */ enum drm_gem_object_status (*status)(struct drm_gem_object *obj); /** * @rss: * * Return resident size of the object in physical memory. * * Called by drm_show_memory_stats(). */ size_t (*rss)(struct drm_gem_object *obj); /** * @vm_ops: * * Virtual memory operations used with mmap. * * This is optional but necessary for mmap support. */ const struct vm_operations_struct *vm_ops; }; /** * struct drm_gem_lru - A simple LRU helper * * A helper for tracking GEM objects in a given state, to aid in * driver's shrinker implementation. Tracks the count of pages * for lockless &shrinker.count_objects, and provides * &drm_gem_lru_scan for driver's &shrinker.scan_objects * implementation. */ struct drm_gem_lru { /** * @lock: * * Lock protecting movement of GEM objects between LRUs. All * LRUs that the object can move between should be protected * by the same lock. */ struct mutex *lock; /** * @count: * * The total number of backing pages of the GEM objects in * this LRU. */ long count; /** * @list: * * The LRU list. */ struct list_head list; }; /** * struct drm_gem_object - GEM buffer object * * This structure defines the generic parts for GEM buffer objects, which are * mostly around handling mmap and userspace handles. * * Buffer objects are often abbreviated to BO. */ struct drm_gem_object { /** * @refcount: * * Reference count of this object * * Please use drm_gem_object_get() to acquire and drm_gem_object_put_locked() * or drm_gem_object_put() to release a reference to a GEM * buffer object. */ struct kref refcount; /** * @handle_count: * * This is the GEM file_priv handle count of this object. * * Each handle also holds a reference. Note that when the handle_count * drops to 0 any global names (e.g. the id in the flink namespace) will * be cleared. * * Protected by &drm_device.object_name_lock. */ unsigned handle_count; /** * @dev: DRM dev this object belongs to. */ struct drm_device *dev; /** * @filp: * * SHMEM file node used as backing storage for swappable buffer objects. * GEM also supports driver private objects with driver-specific backing * storage (contiguous DMA memory, special reserved blocks). In this * case @filp is NULL. */ struct file *filp; /** * @vma_node: * * Mapping info for this object to support mmap. Drivers are supposed to * allocate the mmap offset using drm_gem_create_mmap_offset(). The * offset itself can be retrieved using drm_vma_node_offset_addr(). * * Memory mapping itself is handled by drm_gem_mmap(), which also checks * that userspace is allowed to access the object. */ struct drm_vma_offset_node vma_node; /** * @size: * * Size of the object, in bytes. Immutable over the object's * lifetime. */ size_t size; /** * @name: * * Global name for this object, starts at 1. 0 means unnamed. * Access is covered by &drm_device.object_name_lock. This is used by * the GEM_FLINK and GEM_OPEN ioctls. */ int name; /** * @dma_buf: * * dma-buf associated with this GEM object. * * Pointer to the dma-buf associated with this gem object (either * through importing or exporting). We break the resulting reference * loop when the last gem handle for this object is released. * * Protected by &drm_device.object_name_lock. */ struct dma_buf *dma_buf; /** * @import_attach: * * dma-buf attachment backing this object. * * Any foreign dma_buf imported as a gem object has this set to the * attachment point for the device. This is invariant over the lifetime * of a gem object. * * The &drm_gem_object_funcs.free callback is responsible for * cleaning up the dma_buf attachment and references acquired at import * time. * * Note that the drm gem/prime core does not depend upon drivers setting * this field any more. So for drivers where this doesn't make sense * (e.g. virtual devices or a displaylink behind an usb bus) they can * simply leave it as NULL. */ struct dma_buf_attachment *import_attach; /** * @resv: * * Pointer to reservation object associated with the this GEM object. * * Normally (@resv == &@_resv) except for imported GEM objects. */ struct dma_resv *resv; /** * @_resv: * * A reservation object for this GEM object. * * This is unused for imported GEM objects. */ struct dma_resv _resv; /** * @gpuva: * * Provides the list of GPU VAs attached to this GEM object. * * Drivers should lock list accesses with the GEMs &dma_resv lock * (&drm_gem_object.resv) or a custom lock if one is provided. */ struct { struct list_head list; #ifdef CONFIG_LOCKDEP struct lockdep_map *lock_dep_map; #endif } gpuva; /** * @funcs: * * Optional GEM object functions. If this is set, it will be used instead of the * corresponding &drm_driver GEM callbacks. * * New drivers should use this. * */ const struct drm_gem_object_funcs *funcs; /** * @lru_node: * * List node in a &drm_gem_lru. */ struct list_head lru_node; /** * @lru: * * The current LRU list that the GEM object is on. */ struct drm_gem_lru *lru; }; /** * DRM_GEM_FOPS - Default drm GEM file operations * * This macro provides a shorthand for setting the GEM file ops in the * &file_operations structure. If all you need are the default ops, use * DEFINE_DRM_GEM_FOPS instead. */ #define DRM_GEM_FOPS \ .open = drm_open,\ .release = drm_release,\ .unlocked_ioctl = drm_ioctl,\ .compat_ioctl = drm_compat_ioctl,\ .poll = drm_poll,\ .read = drm_read,\ .llseek = noop_llseek,\ .mmap = drm_gem_mmap, \ .fop_flags = FOP_UNSIGNED_OFFSET /** * DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers * @name: name for the generated structure * * This macro autogenerates a suitable &struct file_operations for GEM based * drivers, which can be assigned to &drm_driver.fops. Note that this structure * cannot be shared between drivers, because it contains a reference to the * current module using THIS_MODULE. * * Note that the declaration is already marked as static - if you need a * non-static version of this you're probably doing it wrong and will break the * THIS_MODULE reference by accident. */ #define DEFINE_DRM_GEM_FOPS(name) \ static const struct file_operations name = {\ .owner = THIS_MODULE,\ DRM_GEM_FOPS,\ } void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); int drm_gem_object_init_with_mnt(struct drm_device *dev, struct drm_gem_object *obj, size_t size, struct vfsmount *gemfs); void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); void drm_gem_private_object_fini(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); /** * drm_gem_object_get - acquire a GEM buffer object reference * @obj: GEM buffer object * * This function acquires an additional reference to @obj. It is illegal to * call this without already holding a reference. No locks required. */ static inline void drm_gem_object_get(struct drm_gem_object *obj) { kref_get(&obj->refcount); } __attribute__((nonnull)) static inline void __drm_gem_object_put(struct drm_gem_object *obj) { kref_put(&obj->refcount, drm_gem_object_free); } /** * drm_gem_object_put - drop a GEM buffer object reference * @obj: GEM buffer object * * This releases a reference to @obj. */ static inline void drm_gem_object_put(struct drm_gem_object *obj) { if (obj) __drm_gem_object_put(obj); } int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); int drm_gem_handle_delete(struct drm_file *filp, u32 handle); void drm_gem_free_mmap_offset(struct drm_gem_object *obj); int drm_gem_create_mmap_offset(struct drm_gem_object *obj); int drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size); struct page **drm_gem_get_pages(struct drm_gem_object *obj); void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, bool dirty, bool accessed); void drm_gem_lock(struct drm_gem_object *obj); void drm_gem_unlock(struct drm_gem_object *obj); int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map); void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map); int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, int count, struct drm_gem_object ***objs_out); struct drm_gem_object *drm_gem_object_lookup(struct drm_file *filp, u32 handle); long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, bool wait_all, unsigned long timeout); int drm_gem_lock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx); void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx); int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset); void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock); void drm_gem_lru_remove(struct drm_gem_object *obj); void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj); unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)); int drm_gem_evict(struct drm_gem_object *obj); /** * drm_gem_object_is_shared_for_memory_stats - helper for shared memory stats * * This helper should only be used for fdinfo shared memory stats to determine * if a GEM object is shared. * * @obj: obj in question */ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_object *obj) { return (obj->handle_count > 1) || obj->dma_buf; } #ifdef CONFIG_LOCKDEP /** * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list. * @obj: the &drm_gem_object * @lock: the lock used to protect the gpuva list. The locking primitive * must contain a dep_map field. * * Call this if you're not proctecting access to the gpuva list with the * dma-resv lock, but with a custom lock. */ #define drm_gem_gpuva_set_lock(obj, lock) \ if (!WARN((obj)->gpuva.lock_dep_map, \ "GEM GPUVA lock should be set only once.")) \ (obj)->gpuva.lock_dep_map = &(lock)->dep_map #define drm_gem_gpuva_assert_lock_held(obj) \ lockdep_assert((obj)->gpuva.lock_dep_map ? \ lock_is_held((obj)->gpuva.lock_dep_map) : \ dma_resv_held((obj)->resv)) #else #define drm_gem_gpuva_set_lock(obj, lock) do {} while (0) #define drm_gem_gpuva_assert_lock_held(obj) do {} while (0) #endif /** * drm_gem_gpuva_init() - initialize the gpuva list of a GEM object * @obj: the &drm_gem_object * * This initializes the &drm_gem_object's &drm_gpuvm_bo list. * * Calling this function is only necessary for drivers intending to support the * &drm_driver_feature DRIVER_GEM_GPUVA. * * See also drm_gem_gpuva_set_lock(). */ static inline void drm_gem_gpuva_init(struct drm_gem_object *obj) { INIT_LIST_HEAD(&obj->gpuva.list); } /** * drm_gem_for_each_gpuvm_bo() - iterator to walk over a list of &drm_gpuvm_bo * @entry__: &drm_gpuvm_bo structure to assign to in each iteration step * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with * * This iterator walks over all &drm_gpuvm_bo structures associated with the * &drm_gem_object. */ #define drm_gem_for_each_gpuvm_bo(entry__, obj__) \ list_for_each_entry(entry__, &(obj__)->gpuva.list, list.entry.gem) /** * drm_gem_for_each_gpuvm_bo_safe() - iterator to safely walk over a list of * &drm_gpuvm_bo * @entry__: &drm_gpuvm_bostructure to assign to in each iteration step * @next__: &next &drm_gpuvm_bo to store the next step * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with * * This iterator walks over all &drm_gpuvm_bo structures associated with the * &drm_gem_object. It is implemented with list_for_each_entry_safe(), hence * it is save against removal of elements. */ #define drm_gem_for_each_gpuvm_bo_safe(entry__, next__, obj__) \ list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, list.entry.gem) #endif /* __DRM_GEM_H__ */
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RECIPROCAL_DIV_H #define _LINUX_RECIPROCAL_DIV_H #include <linux/types.h> /* * This algorithm is based on the paper "Division by Invariant * Integers Using Multiplication" by Torbjörn Granlund and Peter * L. Montgomery. * * The assembler implementation from Agner Fog, which this code is * based on, can be found here: * http://www.agner.org/optimize/asmlib.zip * * This optimization for A/B is helpful if the divisor B is mostly * runtime invariant. The reciprocal of B is calculated in the * slow-path with reciprocal_value(). The fast-path can then just use * a much faster multiplication operation with a variable dividend A * to calculate the division A/B. */ struct reciprocal_value { u32 m; u8 sh1, sh2; }; /* "reciprocal_value" and "reciprocal_divide" together implement the basic * version of the algorithm described in Figure 4.1 of the paper. */ struct reciprocal_value reciprocal_value(u32 d); static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) { u32 t = (u32)(((u64)a * R.m) >> 32); return (t + ((a - t) >> R.sh1)) >> R.sh2; } struct reciprocal_value_adv { u32 m; u8 sh, exp; bool is_wide_m; }; /* "reciprocal_value_adv" implements the advanced version of the algorithm * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The * exception case could be easily handled before calling "reciprocal_value_adv". * * The advanced version requires more complex calculation to get the reciprocal * multiplier and other control variables, but then could reduce the required * emulation operations. * * It makes no sense to use this advanced version for host divide emulation, * those extra complexities for calculating multiplier etc could completely * waive our saving on emulation operations. * * However, it makes sense to use it for JIT divide code generation for which * we are willing to trade performance of JITed code with that of host. As shown * by the following pseudo code, the required emulation operations could go down * from 6 (the basic version) to 3 or 4. * * To use the result of "reciprocal_value_adv", suppose we want to calculate * n/d, the pseudo C code will be: * * struct reciprocal_value_adv rvalue; * u8 pre_shift, exp; * * // handle exception case. * if (d >= (1U << 31)) { * result = n >= d; * return; * } * * rvalue = reciprocal_value_adv(d, 32) * exp = rvalue.exp; * if (rvalue.is_wide_m && !(d & 1)) { * // floor(log2(d & (2^32 -d))) * pre_shift = fls(d & -d) - 1; * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); * } else { * pre_shift = 0; * } * * // code generation starts. * if (imm == 1U << exp) { * result = n >> exp; * } else if (rvalue.is_wide_m) { * // pre_shift must be zero when reached here. * t = (n * rvalue.m) >> 32; * result = n - t; * result >>= 1; * result += t; * result >>= rvalue.sh - 1; * } else { * if (pre_shift) * result = n >> pre_shift; * result = ((u64)result * rvalue.m) >> 32; * result >>= rvalue.sh; * } */ struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); #endif /* _LINUX_RECIPROCAL_DIV_H */
12 12 12 12 4 4 12 12 18 12 139 139 139 27 27 27 27 1 1 26 25 25 25 25 2 25 25 25 25 224 112 115 114 114 224 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 // SPDX-License-Identifier: GPL-2.0 /* * Functions related to io context handling */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/security.h> #include <linux/sched/task.h> #include "blk.h" #include "blk-mq-sched.h" /* * For io context allocations */ static struct kmem_cache *iocontext_cachep; #ifdef CONFIG_BLK_ICQ /** * get_io_context - increment reference count to io_context * @ioc: io_context to get * * Increment reference count to @ioc. */ static void get_io_context(struct io_context *ioc) { BUG_ON(atomic_long_read(&ioc->refcount) <= 0); atomic_long_inc(&ioc->refcount); } /* * Exit an icq. Called with ioc locked for blk-mq, and with both ioc * and queue locked for legacy. */ static void ioc_exit_icq(struct io_cq *icq) { struct elevator_type *et = icq->q->elevator->type; if (icq->flags & ICQ_EXITED) return; if (et->ops.exit_icq) et->ops.exit_icq(icq); icq->flags |= ICQ_EXITED; } static void ioc_exit_icqs(struct io_context *ioc) { struct io_cq *icq; spin_lock_irq(&ioc->lock); hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) ioc_exit_icq(icq); spin_unlock_irq(&ioc->lock); } /* * Release an icq. Called with ioc locked for blk-mq, and with both ioc * and queue locked for legacy. */ static void ioc_destroy_icq(struct io_cq *icq) { struct io_context *ioc = icq->ioc; struct request_queue *q = icq->q; struct elevator_type *et = q->elevator->type; lockdep_assert_held(&ioc->lock); lockdep_assert_held(&q->queue_lock); if (icq->flags & ICQ_DESTROYED) return; radix_tree_delete(&ioc->icq_tree, icq->q->id); hlist_del_init(&icq->ioc_node); list_del_init(&icq->q_node); /* * Both setting lookup hint to and clearing it from @icq are done * under queue_lock. If it's not pointing to @icq now, it never * will. Hint assignment itself can race safely. */ if (rcu_access_pointer(ioc->icq_hint) == icq) rcu_assign_pointer(ioc->icq_hint, NULL); ioc_exit_icq(icq); /* * @icq->q might have gone away by the time RCU callback runs * making it impossible to determine icq_cache. Record it in @icq. */ icq->__rcu_icq_cache = et->icq_cache; icq->flags |= ICQ_DESTROYED; kfree_rcu(icq, __rcu_head); } /* * Slow path for ioc release in put_io_context(). Performs double-lock * dancing to unlink all icq's and then frees ioc. */ static void ioc_release_fn(struct work_struct *work) { struct io_context *ioc = container_of(work, struct io_context, release_work); spin_lock_irq(&ioc->lock); while (!hlist_empty(&ioc->icq_list)) { struct io_cq *icq = hlist_entry(ioc->icq_list.first, struct io_cq, ioc_node); struct request_queue *q = icq->q; if (spin_trylock(&q->queue_lock)) { ioc_destroy_icq(icq); spin_unlock(&q->queue_lock); } else { /* Make sure q and icq cannot be freed. */ rcu_read_lock(); /* Re-acquire the locks in the correct order. */ spin_unlock(&ioc->lock); spin_lock(&q->queue_lock); spin_lock(&ioc->lock); ioc_destroy_icq(icq); spin_unlock(&q->queue_lock); rcu_read_unlock(); } } spin_unlock_irq(&ioc->lock); kmem_cache_free(iocontext_cachep, ioc); } /* * Releasing icqs requires reverse order double locking and we may already be * holding a queue_lock. Do it asynchronously from a workqueue. */ static bool ioc_delay_free(struct io_context *ioc) { unsigned long flags; spin_lock_irqsave(&ioc->lock, flags); if (!hlist_empty(&ioc->icq_list)) { queue_work(system_power_efficient_wq, &ioc->release_work); spin_unlock_irqrestore(&ioc->lock, flags); return true; } spin_unlock_irqrestore(&ioc->lock, flags); return false; } /** * ioc_clear_queue - break any ioc association with the specified queue * @q: request_queue being cleared * * Walk @q->icq_list and exit all io_cq's. */ void ioc_clear_queue(struct request_queue *q) { spin_lock_irq(&q->queue_lock); while (!list_empty(&q->icq_list)) { struct io_cq *icq = list_first_entry(&q->icq_list, struct io_cq, q_node); /* * Other context won't hold ioc lock to wait for queue_lock, see * details in ioc_release_fn(). */ spin_lock(&icq->ioc->lock); ioc_destroy_icq(icq); spin_unlock(&icq->ioc->lock); } spin_unlock_irq(&q->queue_lock); } #else /* CONFIG_BLK_ICQ */ static inline void ioc_exit_icqs(struct io_context *ioc) { } static inline bool ioc_delay_free(struct io_context *ioc) { return false; } #endif /* CONFIG_BLK_ICQ */ /** * put_io_context - put a reference of io_context * @ioc: io_context to put * * Decrement reference count of @ioc and release it if the count reaches * zero. */ void put_io_context(struct io_context *ioc) { BUG_ON(atomic_long_read(&ioc->refcount) <= 0); if (atomic_long_dec_and_test(&ioc->refcount) && !ioc_delay_free(ioc)) kmem_cache_free(iocontext_cachep, ioc); } EXPORT_SYMBOL_GPL(put_io_context); /* Called by the exiting task */ void exit_io_context(struct task_struct *task) { struct io_context *ioc; task_lock(task); ioc = task->io_context; task->io_context = NULL; task_unlock(task); if (atomic_dec_and_test(&ioc->active_ref)) { ioc_exit_icqs(ioc); put_io_context(ioc); } } static struct io_context *alloc_io_context(gfp_t gfp_flags, int node) { struct io_context *ioc; ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO, node); if (unlikely(!ioc)) return NULL; atomic_long_set(&ioc->refcount, 1); atomic_set(&ioc->active_ref, 1); #ifdef CONFIG_BLK_ICQ spin_lock_init(&ioc->lock); INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC); INIT_HLIST_HEAD(&ioc->icq_list); INIT_WORK(&ioc->release_work, ioc_release_fn); #endif ioc->ioprio = IOPRIO_DEFAULT; return ioc; } int set_task_ioprio(struct task_struct *task, int ioprio) { int err; const struct cred *cred = current_cred(), *tcred; rcu_read_lock(); tcred = __task_cred(task); if (!uid_eq(tcred->uid, cred->euid) && !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) { rcu_read_unlock(); return -EPERM; } rcu_read_unlock(); err = security_task_setioprio(task, ioprio); if (err) return err; task_lock(task); if (unlikely(!task->io_context)) { struct io_context *ioc; task_unlock(task); ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE); if (!ioc) return -ENOMEM; task_lock(task); if (task->flags & PF_EXITING) { kmem_cache_free(iocontext_cachep, ioc); goto out; } if (task->io_context) kmem_cache_free(iocontext_cachep, ioc); else task->io_context = ioc; } task->io_context->ioprio = ioprio; out: task_unlock(task); return 0; } EXPORT_SYMBOL_GPL(set_task_ioprio); int __copy_io(unsigned long clone_flags, struct task_struct *tsk) { struct io_context *ioc = current->io_context; /* * Share io context with parent, if CLONE_IO is set */ if (clone_flags & CLONE_IO) { atomic_inc(&ioc->active_ref); tsk->io_context = ioc; } else if (ioprio_valid(ioc->ioprio)) { tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE); if (!tsk->io_context) return -ENOMEM; tsk->io_context->ioprio = ioc->ioprio; } return 0; } #ifdef CONFIG_BLK_ICQ /** * ioc_lookup_icq - lookup io_cq from ioc * @q: the associated request_queue * * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called * with @q->queue_lock held. */ struct io_cq *ioc_lookup_icq(struct request_queue *q) { struct io_context *ioc = current->io_context; struct io_cq *icq; lockdep_assert_held(&q->queue_lock); /* * icq's are indexed from @ioc using radix tree and hint pointer, * both of which are protected with RCU. All removals are done * holding both q and ioc locks, and we're holding q lock - if we * find a icq which points to us, it's guaranteed to be valid. */ rcu_read_lock(); icq = rcu_dereference(ioc->icq_hint); if (icq && icq->q == q) goto out; icq = radix_tree_lookup(&ioc->icq_tree, q->id); if (icq && icq->q == q) rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */ else icq = NULL; out: rcu_read_unlock(); return icq; } EXPORT_SYMBOL(ioc_lookup_icq); /** * ioc_create_icq - create and link io_cq * @q: request_queue of interest * * Make sure io_cq linking @ioc and @q exists. If icq doesn't exist, they * will be created using @gfp_mask. * * The caller is responsible for ensuring @ioc won't go away and @q is * alive and will stay alive until this function returns. */ static struct io_cq *ioc_create_icq(struct request_queue *q) { struct io_context *ioc = current->io_context; struct elevator_type *et = q->elevator->type; struct io_cq *icq; /* allocate stuff */ icq = kmem_cache_alloc_node(et->icq_cache, GFP_ATOMIC | __GFP_ZERO, q->node); if (!icq) return NULL; if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) { kmem_cache_free(et->icq_cache, icq); return NULL; } icq->ioc = ioc; icq->q = q; INIT_LIST_HEAD(&icq->q_node); INIT_HLIST_NODE(&icq->ioc_node); /* lock both q and ioc and try to link @icq */ spin_lock_irq(&q->queue_lock); spin_lock(&ioc->lock); if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); if (et->ops.init_icq) et->ops.init_icq(icq); } else { kmem_cache_free(et->icq_cache, icq); icq = ioc_lookup_icq(q); if (!icq) printk(KERN_ERR "cfq: icq link failed!\n"); } spin_unlock(&ioc->lock); spin_unlock_irq(&q->queue_lock); radix_tree_preload_end(); return icq; } struct io_cq *ioc_find_get_icq(struct request_queue *q) { struct io_context *ioc = current->io_context; struct io_cq *icq = NULL; if (unlikely(!ioc)) { ioc = alloc_io_context(GFP_ATOMIC, q->node); if (!ioc) return NULL; task_lock(current); if (current->io_context) { kmem_cache_free(iocontext_cachep, ioc); ioc = current->io_context; } else { current->io_context = ioc; } get_io_context(ioc); task_unlock(current); } else { get_io_context(ioc); spin_lock_irq(&q->queue_lock); icq = ioc_lookup_icq(q); spin_unlock_irq(&q->queue_lock); } if (!icq) { icq = ioc_create_icq(q); if (!icq) { put_io_context(ioc); return NULL; } } return icq; } EXPORT_SYMBOL_GPL(ioc_find_get_icq); #endif /* CONFIG_BLK_ICQ */ static int __init blk_ioc_init(void) { iocontext_cachep = kmem_cache_create("blkdev_ioc", sizeof(struct io_context), 0, SLAB_PANIC, NULL); return 0; } subsys_initcall(blk_ioc_init);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2024 Linaro Limited * * Author: Daniel Lezcano <daniel.lezcano@linaro.org> * * Thermal thresholds */ #include <linux/list.h> #include <linux/list_sort.h> #include <linux/slab.h> #include "thermal_core.h" #include "thermal_thresholds.h" int thermal_thresholds_init(struct thermal_zone_device *tz) { INIT_LIST_HEAD(&tz->user_thresholds); return 0; } static void __thermal_thresholds_flush(struct thermal_zone_device *tz) { struct list_head *thresholds = &tz->user_thresholds; struct user_threshold *entry, *tmp; list_for_each_entry_safe(entry, tmp, thresholds, list_node) { list_del(&entry->list_node); kfree(entry); } } void thermal_thresholds_flush(struct thermal_zone_device *tz) { lockdep_assert_held(&tz->lock); __thermal_thresholds_flush(tz); thermal_notify_threshold_flush(tz); __thermal_zone_device_update(tz, THERMAL_TZ_FLUSH_THRESHOLDS); } void thermal_thresholds_exit(struct thermal_zone_device *tz) { __thermal_thresholds_flush(tz); } static int __thermal_thresholds_cmp(void *data, const struct list_head *l1, const struct list_head *l2) { struct user_threshold *t1 = container_of(l1, struct user_threshold, list_node); struct user_threshold *t2 = container_of(l2, struct user_threshold, list_node); return t1->temperature - t2->temperature; } static struct user_threshold *__thermal_thresholds_find(const struct list_head *thresholds, int temperature) { struct user_threshold *t; list_for_each_entry(t, thresholds, list_node) if (t->temperature == temperature) return t; return NULL; } static bool thermal_thresholds_handle_raising(struct list_head *thresholds, int temperature, int last_temperature) { struct user_threshold *t; list_for_each_entry(t, thresholds, list_node) { if (!(t->direction & THERMAL_THRESHOLD_WAY_UP)) continue; if (temperature >= t->temperature && last_temperature < t->temperature) return true; } return false; } static bool thermal_thresholds_handle_dropping(struct list_head *thresholds, int temperature, int last_temperature) { struct user_threshold *t; list_for_each_entry_reverse(t, thresholds, list_node) { if (!(t->direction & THERMAL_THRESHOLD_WAY_DOWN)) continue; if (temperature <= t->temperature && last_temperature > t->temperature) return true; } return false; } static void thermal_threshold_find_boundaries(struct list_head *thresholds, int temperature, int *low, int *high) { struct user_threshold *t; list_for_each_entry(t, thresholds, list_node) { if (temperature < t->temperature && (t->direction & THERMAL_THRESHOLD_WAY_UP) && *high > t->temperature) *high = t->temperature; } list_for_each_entry_reverse(t, thresholds, list_node) { if (temperature > t->temperature && (t->direction & THERMAL_THRESHOLD_WAY_DOWN) && *low < t->temperature) *low = t->temperature; } } void thermal_thresholds_handle(struct thermal_zone_device *tz, int *low, int *high) { struct list_head *thresholds = &tz->user_thresholds; int temperature = tz->temperature; int last_temperature = tz->last_temperature; lockdep_assert_held(&tz->lock); thermal_threshold_find_boundaries(thresholds, temperature, low, high); /* * We need a second update in order to detect a threshold being crossed */ if (last_temperature == THERMAL_TEMP_INVALID) return; /* * The temperature is stable, so obviously we can not have * crossed a threshold. */ if (last_temperature == temperature) return; /* * Since last update the temperature: * - increased : thresholds are crossed the way up * - decreased : thresholds are crossed the way down */ if (temperature > last_temperature) { if (thermal_thresholds_handle_raising(thresholds, temperature, last_temperature)) thermal_notify_threshold_up(tz); } else { if (thermal_thresholds_handle_dropping(thresholds, temperature, last_temperature)) thermal_notify_threshold_down(tz); } } int thermal_thresholds_add(struct thermal_zone_device *tz, int temperature, int direction) { struct list_head *thresholds = &tz->user_thresholds; struct user_threshold *t; lockdep_assert_held(&tz->lock); t = __thermal_thresholds_find(thresholds, temperature); if (t) { if (t->direction == direction) return -EEXIST; t->direction |= direction; } else { t = kmalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; INIT_LIST_HEAD(&t->list_node); t->temperature = temperature; t->direction = direction; list_add(&t->list_node, thresholds); list_sort(NULL, thresholds, __thermal_thresholds_cmp); } thermal_notify_threshold_add(tz, temperature, direction); __thermal_zone_device_update(tz, THERMAL_TZ_ADD_THRESHOLD); return 0; } int thermal_thresholds_delete(struct thermal_zone_device *tz, int temperature, int direction) { struct list_head *thresholds = &tz->user_thresholds; struct user_threshold *t; lockdep_assert_held(&tz->lock); t = __thermal_thresholds_find(thresholds, temperature); if (!t) return -ENOENT; if (t->direction == direction) { list_del(&t->list_node); kfree(t); } else { t->direction &= ~direction; } thermal_notify_threshold_delete(tz, temperature, direction); __thermal_zone_device_update(tz, THERMAL_TZ_DEL_THRESHOLD); return 0; } int thermal_thresholds_for_each(struct thermal_zone_device *tz, int (*cb)(struct user_threshold *, void *arg), void *arg) { struct list_head *thresholds = &tz->user_thresholds; struct user_threshold *entry; int ret; guard(thermal_zone)(tz); list_for_each_entry(entry, thresholds, list_node) { ret = cb(entry, arg); if (ret) return ret; } return 0; }
32 32 17 32 32 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/dccp/minisocks.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/dccp.h> #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/timer.h> #include <net/sock.h> #include <net/xfrm.h> #include <net/inet_timewait_sock.h> #include <net/rstreason.h> #include "ackvec.h" #include "ccid.h" #include "dccp.h" #include "feat.h" struct inet_timewait_death_row dccp_death_row = { .tw_refcount = REFCOUNT_INIT(1), .sysctl_max_tw_buckets = NR_FILE * 2, .hashinfo = &dccp_hashinfo, }; EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw; tw = inet_twsk_alloc(sk, &dccp_death_row, state); if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_ipv6only = sk->sk_ipv6only; } #endif /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; /* Linkage updates. * Note that access to tw after this point is illegal. */ inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ DCCP_WARN("time wait bucket table overflow\n"); } dccp_done(sk); } struct sock *dccp_create_openreq_child(const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb) { /* * Step 3: Process LISTEN state * * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair */ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { struct dccp_request_sock *dreq = dccp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct dccp_sock *newdp = dccp_sk(newsk); newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; newdp->dccps_hc_rx_ccid = NULL; newdp->dccps_hc_tx_ccid = NULL; newdp->dccps_service = dreq->dreq_service; newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; INIT_LIST_HEAD(&newdp->dccps_featneg); /* * Step 3: Process LISTEN state * * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR from packet (or Init Cookies) * * Setting AWL/AWH and SWL/SWH happens as part of the feature * activation below, as these windows all depend on the local * and remote Sequence Window feature values (7.5.2). */ newdp->dccps_iss = dreq->dreq_iss; newdp->dccps_gss = dreq->dreq_gss; newdp->dccps_gar = newdp->dccps_iss; newdp->dccps_isr = dreq->dreq_isr; newdp->dccps_gsr = dreq->dreq_gsr; /* * Activate features: initialise CCIDs, sequence windows etc. */ if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { sk_free_unlock_clone(newsk); return NULL; } dccp_init_xmit_timers(newsk); __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS); } return newsk; } EXPORT_SYMBOL_GPL(dccp_create_openreq_child); /* * Process an incoming packet for RESPOND sockets represented * as an request_sock. */ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct sock *child = NULL; struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; /* TCP/DCCP listeners became lockless. * DCCP stores complex state in its request_sock, so we need * a protection for them, now this code runs without being protected * by the parent (listener) lock. */ spin_lock_bh(&dreq->dreq_lock); /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_gsr)) { dccp_pr_debug("Retransmitted REQUEST\n"); dreq->dreq_gsr = DCCP_SKB_CB(skb)->dccpd_seq; /* * Send another RESPONSE packet * To protect against Request floods, increment retrans * counter (backoff, monitored by dccp_response_timer). */ inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) goto drop; /* Invalid ACK */ if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dreq->dreq_iss, dreq->dreq_gss)) { dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " "dreq_iss=%llu, dreq_gss=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, (unsigned long long) dreq->dreq_iss, (unsigned long long) dreq->dreq_gss); goto drop; } if (dccp_parse_options(sk, dreq, skb)) goto drop; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); if (child) { child = inet_csk_complete_hashdance(sk, child, req, own_req); goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_csk_reqsk_queue_drop(sk, req); out: spin_unlock_bh(&dreq->dreq_lock); return child; } EXPORT_SYMBOL_GPL(dccp_check_req); /* * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. */ int dccp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) __releases(child) { int ret = 0; const int state = child->sk_state; if (!sock_owned_by_user(child)) { ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); /* Wakeup parent, send SIGIO */ if (state == DCCP_RESPOND && child->sk_state != state) parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening * socket does not protect us more. */ __sk_add_backlog(child, skb); } bh_unlock_sock(child); sock_put(child); return ret; } EXPORT_SYMBOL_GPL(dccp_child_process); void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *rsk) { DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); } EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); int dccp_reqsk_init(struct request_sock *req, struct dccp_sock const *dp, struct sk_buff const *skb) { struct dccp_request_sock *dreq = dccp_rsk(req); spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; dreq->dreq_timestamp_echo = 0; /* inherit feature negotiation options from listening socket */ return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); } EXPORT_SYMBOL_GPL(dccp_reqsk_init);
52 40 40 52 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/sysv68.c * * Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be> */ #include "check.h" /* * Volume ID structure: on first 256-bytes sector of disk */ struct volumeid { u8 vid_unused[248]; u8 vid_mac[8]; /* ASCII string "MOTOROLA" */ }; /* * config block: second 256-bytes sector on disk */ struct dkconfig { u8 ios_unused0[128]; __be32 ios_slcblk; /* Slice table block number */ __be16 ios_slccnt; /* Number of entries in slice table */ u8 ios_unused1[122]; }; /* * combined volumeid and dkconfig block */ struct dkblk0 { struct volumeid dk_vid; struct dkconfig dk_ios; }; /* * Slice Table Structure */ struct slice { __be32 nblocks; /* slice size (in blocks) */ __be32 blkoff; /* block offset of slice */ }; int sysv68_partition(struct parsed_partitions *state) { int i, slices; int slot = 1; Sector sect; unsigned char *data; struct dkblk0 *b; struct slice *slice; char tmp[64]; data = read_part_sector(state, 0, &sect); if (!data) return -1; b = (struct dkblk0 *)data; if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) { put_dev_sector(sect); return 0; } slices = be16_to_cpu(b->dk_ios.ios_slccnt); i = be32_to_cpu(b->dk_ios.ios_slcblk); put_dev_sector(sect); data = read_part_sector(state, i, &sect); if (!data) return -1; slices -= 1; /* last slice is the whole disk */ snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices); strlcat(state->pp_buf, tmp, PAGE_SIZE); slice = (struct slice *)data; for (i = 0; i < slices; i++, slice++) { if (slot == state->limit) break; if (be32_to_cpu(slice->nblocks)) { put_partition(state, slot, be32_to_cpu(slice->blkoff), be32_to_cpu(slice->nblocks)); snprintf(tmp, sizeof(tmp), "(s%u)", i); strlcat(state->pp_buf, tmp, PAGE_SIZE); } slot++; } strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; }
34 44 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SHARED_IO_H #define _ASM_X86_SHARED_IO_H #include <linux/types.h> #define BUILDIO(bwl, bw, type) \ static __always_inline void __out##bwl(type value, u16 port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(u16 port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, u8) BUILDIO(w, w, u16) BUILDIO(l, , u32) #undef BUILDIO #define inb __inb #define inw __inw #define inl __inl #define outb __outb #define outw __outw #define outl __outl #endif
483 426 425 483 403 38 38 38 38 37 38 487 488 309 486 488 487 16279 461 465 463 465 15857 15846 1295 1290 174 1110 1103 1112 1103 42 42 42 41 42 488 487 5 5 5 5 15014 13816 13620 6040 15 1 15015 14960 15026 14941 15015 119 14932 14947 14958 14944 14947 14948 795 10 118 135 16 16 16 16 9 7 7 7 16 16 16 134 135 482 480 481 483 483 481 483 483 482 480 481 4 4 3 3 17 17 44 5 5 5 5 5 5 5 5 3 4 5 4 4 5 27 27 44 44 44 1496 44 116 12648 12648 4869 14942 1496 1464 14132 15707 11622 15005 20 20 14932 428 14919 193 14946 15703 15680 15511 693 447 532 662 15707 15691 11347 11372 15682 15692 6111 15696 15692 15684 6643 5928 5923 1264 5161 6518 5832 88 448 443 15 555 554 554 554 530 534 557 445 7 7 7 7 12631 12932 12631 17 33 33 1 1 33 33 33 11677 11645 11665 11676 11677 11563 11610 5 5 40 5 40 4 4 4 4 4 2 2 2 1 1 40 40 40 2 38 38 40 40 9 9 9 2 7 7 7 4 40 6 6 6 1 26 19 15 19 8 6 19 19 26 26 31 31 26 5 7 5 10 1 9 4 9 10 4 8 10 146 2 146 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 2 8 8 8 8 8 8 2 2 8 8 8 8 11612 11564 596 592 11365 11360 11356 11611 2458 104 2462 263 2461 406 52 236 237 237 237 236 236 158 159 159 17 159 158 2415 2408 2416 2415 2414 2412 530 532 5 5 529 52 527 528 2275 2414 180 2254 2408 6 1 5 5 6 31 31 31 27 26 27 3 3 1 3 27 27 23 23 23 1 22 22 22 22 9 22 22 22 9 9 9 5 5 15346 15352 15336 2459 2453 10016 298 305 2087 2033 66 5 3 65 45 45 43 64 57 57 65 3 2 2 2 2 2 2 66 45 66 22 55 12 66 165 2 2 165 36 166 169 169 43 169 169 3 3 3 3 3 3 2 169 159 52 52 129 128 7 60 169 54 169 996 791 631 994 66 291 290 256 256 255 256 255 5 255 50 37 32 31 31 32 8 5 2 10 10 10 10 18 3 18 10 18 2 18 18 18 18 8 18 18 18 18 8 18 18 18 18 13 18 13 13 18 18 18 31 31 26 31 1 8 8 8 8 8 8 8 8 1063 1035 1043 1047 50 79 79 54 53 54 54 53 52 52 53 53 54 25 27 7 7 7 7 7 3 1067 265 167 165 11 17 169 17 17 17 17 17 17 17 17 3 11 10 2 11 11 710 662 716 710 717 2356 391 2355 1345 1349 101 101 101 101 383 1 383 5 3 3 1 5 1 15 10334 72 122 122 122 122 121 122 76 122 122 121 122 121 124 124 124 124 121 121 121 121 113 59 58 59 1 120 100 120 120 73 73 66 53 120 112 112 120 53 120 121 44 44 12 44 44 44 44 222 225 223 225 61 61 61 26 26 25 61 61 61 61 33 30 3 2 61 61 61 9 52 61 54 9 9 9 9 9 9 9 52 22 52 52 51 61 60 61 61 6 6 52 6 7 39 38 36 37 37 37 34 37 37 34 39 48 6 6 6 6 61 61 2 2 2 2 1 1 61 33 61 12 11 12 12 7 3 4 4 4 4 7 7 7 7 6 7 2 12 12 12 5 5 5 5 5 5 41 58 59 58 9 59 58 58 20 47 37 37 20 20 37 4 48 42 42 13 9 3 42 19 30 42 41 42 14 42 30 12 6 3 9 30 30 12 42 42 42 14 146 142 2 144 146 27 27 10 10 27 27 17 27 27 27 27 3 1 3 134 3 132 221 168 221 183 182 182 32 25 32 166 166 166 166 47 47 3 3 3 3 119 120 123 123 117 32 116 117 117 123 268 81 268 270 2 85 83 84 1 84 83 85 85 85 85 85 84 85 83 86 3 159 150 150 158 64 2 2 1 1 1 2 9 1 9 9 2 1 1 1 2 2 2 2 1 2 2 1539 1544 1542 1544 1540 177 241 152 241 203 117 241 241 241 1 18 56 41 3 3 18 54 110 111 57 6 6 6 57 57 57 56 6 6 6 56 56 56 56 54 27 27 27 163 64 132 24 131 49 39 33 33 51 39 39 49 39 39 39 39 8 2 43 44 44 44 44 44 44 44 11 44 2 44 44 44 44 9 104 24 112 113 113 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 // SPDX-License-Identifier: GPL-2.0-or-later /* * Routines having to do with the 'struct sk_buff' memory handlers. * * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. * Dave Platt : Interrupt stacking fix. * Richard Kooijman : Timestamp fixes. * Alan Cox : Changed buffer format. * Alan Cox : destructor hook for AF_UNIX etc. * Linus Torvalds : Better skb_clone. * Alan Cox : Added skb_copy. * Alan Cox : Added all the changed routines Linus * only put in the headers * Ray VanTassle : Fixed --skb->lock in free * Alan Cox : skb_copy copy arp field * Andi Kleen : slabified it. * Robert Olsson : Removed skb_head_pool * * NOTE: * The __skb_ routines should be called with interrupts * disabled, or you better be *real* sure that the operation is atomic * with respect to whatever list is being frobbed (e.g. via lock_sock() * or via disabling bottom half handlers, etc). */ /* * The functions in this file will not compile correctly with gcc 2.4.x */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/sctp.h> #include <linux/netdevice.h> #ifdef CONFIG_NET_CLS_ACT #include <net/pkt_sched.h> #endif #include <linux/string.h> #include <linux/skbuff.h> #include <linux/skbuff_ref.h> #include <linux/splice.h> #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> #include <linux/bitfield.h> #include <linux/if_vlan.h> #include <linux/mpls.h> #include <linux/kcov.h> #include <linux/iov_iter.h> #include <net/protocol.h> #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> #include <net/gso.h> #include <net/hotdata.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> #include <net/mptcp.h> #include <net/mctp.h> #include <net/page_pool/helpers.h> #include <net/dropreason.h> #include <linux/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> #include <linux/capability.h> #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> #include <linux/textsearch.h> #include "dev.h" #include "netmem_priv.h" #include "sock_destructor.h" #ifdef CONFIG_SKB_EXTENSIONS static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique * size, and we can differentiate heads from skb_small_head_cache * vs system slabs by looking at their size (skb_end_offset()). */ #define SKB_SMALL_HEAD_CACHE_SIZE \ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \ (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \ SKB_SMALL_HEAD_SIZE) #define SKB_SMALL_HEAD_HEADROOM \ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) /* kcm_write_msgs() relies on casting paged frags to bio_vec to use * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the * netmem is a page. */ static_assert(offsetof(struct bio_vec, bv_page) == offsetof(skb_frag_t, netmem)); static_assert(sizeof_field(struct bio_vec, bv_page) == sizeof_field(skb_frag_t, netmem)); static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len)); static_assert(sizeof_field(struct bio_vec, bv_len) == sizeof_field(skb_frag_t, len)); static_assert(offsetof(struct bio_vec, bv_offset) == offsetof(skb_frag_t, offset)); static_assert(sizeof_field(struct bio_vec, bv_offset) == sizeof_field(skb_frag_t, offset)); #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, static const char * const drop_reasons[] = { [SKB_CONSUMED] = "CONSUMED", DEFINE_DROP_REASON(FN, FN) }; static const struct drop_reason_list drop_reasons_core = { .reasons = drop_reasons, .n_reasons = ARRAY_SIZE(drop_reasons), }; const struct drop_reason_list __rcu * drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = { [SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core), }; EXPORT_SYMBOL(drop_reasons_by_subsys); /** * drop_reasons_register_subsys - register another drop reason subsystem * @subsys: the subsystem to register, must not be the core * @list: the list of drop reasons within the subsystem, must point to * a statically initialized list */ void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys, const struct drop_reason_list *list) { if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE || subsys >= ARRAY_SIZE(drop_reasons_by_subsys), "invalid subsystem %d\n", subsys)) return; /* must point to statically allocated memory, so INIT is OK */ RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list); } EXPORT_SYMBOL_GPL(drop_reasons_register_subsys); /** * drop_reasons_unregister_subsys - unregister a drop reason subsystem * @subsys: the subsystem to remove, must not be the core * * Note: This will synchronize_rcu() to ensure no users when it returns. */ void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys) { if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE || subsys >= ARRAY_SIZE(drop_reasons_by_subsys), "invalid subsystem %d\n", subsys)) return; RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL); synchronize_rcu(); } EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys); /** * skb_panic - private function for out-of-line support * @skb: buffer * @sz: size * @addr: address * @msg: skb_over_panic or skb_under_panic * * Out-of-line support for skb_put() and skb_push(). * Called via the wrapper skb_over_panic() or skb_under_panic(). * Keep out of line to prevent kernel bloat. * __builtin_return_address is not used because it is not always reliable. */ static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, const char msg[]) { pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n", msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } #define NAPI_SKB_CACHE_SIZE 64 #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) #if PAGE_SIZE == SZ_4K #define NAPI_HAS_SMALL_PAGE_FRAG 1 #define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) /* specialized page frag allocator using a single order 0 page * and slicing it into 1K sized fragment. Constrained to systems * with a very limited amount of 1K fragments fitting a single * page - to avoid excessive truesize underestimation */ struct page_frag_1k { void *va; u16 offset; bool pfmemalloc; }; static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) { struct page *page; int offset; offset = nc->offset - SZ_1K; if (likely(offset >= 0)) goto use_frag; page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); if (!page) return NULL; nc->va = page_address(page); nc->pfmemalloc = page_is_pfmemalloc(page); offset = PAGE_SIZE - SZ_1K; page_ref_add(page, offset / SZ_1K); use_frag: nc->offset = offset; return nc->va + offset; } #else /* the small page is actually unused in this build; add dummy helpers * to please the compiler and avoid later preprocessor's conditionals */ #define NAPI_HAS_SMALL_PAGE_FRAG 0 #define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false struct page_frag_1k { }; static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) { return NULL; } #endif struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; /* Double check that napi_get_frags() allocates skbs with * skb->head being backed by slab, not a page fragment. * This is to make sure bug fixed in 3226b158e67c * ("net: avoid 32 x truesize under-estimation for tiny skbs") * does not accidentally come back. */ void napi_get_frags_check(struct napi_struct *napi) { struct sk_buff *skb; local_bh_disable(); skb = napi_get_frags(napi); WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); napi_free_frags(napi); local_bh_enable(); } void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); void *data; fragsz = SKB_DATA_ALIGN(fragsz); local_lock_nested_bh(&napi_alloc_cache.bh_lock); data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC | __GFP_NOWARN, align_mask); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return data; } EXPORT_SYMBOL(__napi_alloc_frag_align); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { void *data; if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); fragsz = SKB_DATA_ALIGN(fragsz); data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC | __GFP_NOWARN, align_mask); } else { local_bh_disable(); data = __napi_alloc_frag_align(fragsz, align_mask); local_bh_enable(); } return data; } EXPORT_SYMBOL(__netdev_alloc_frag_align); static struct sk_buff *napi_skb_cache_get(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN, NAPI_SKB_CACHE_BULK, nc->skb_cache); if (unlikely(!nc->skb_count)) { local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return NULL; } } skb = nc->skb_cache[--nc->skb_count]; local_unlock_nested_bh(&napi_alloc_cache.bh_lock); kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); return skb; } static inline void __finalize_skb_around(struct sk_buff *skb, void *data, unsigned int size) { struct skb_shared_info *shinfo; size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Assumes caller memset cleared SKB */ skb->truesize = SKB_TRUESIZE(size); refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; skb->alloc_cpu = raw_smp_processor_id(); /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); skb_set_kcov_handle(skb, kcov_common_handle()); } static inline void *__slab_build_skb(struct sk_buff *skb, void *data, unsigned int *size) { void *resized; /* Must find the allocation size (and grow it to match). */ *size = ksize(data); /* krealloc() will immediately return "data" when * "ksize(data)" is requested: it is the existing upper * bounds. As a result, GFP_ATOMIC will be ignored. Note * that this "new" pointer needs to be passed back to the * caller for use so the __alloc_size hinting will be * tracked correctly. */ resized = krealloc(data, *size, GFP_ATOMIC); WARN_ON_ONCE(resized != data); return resized; } /* build_skb() variant which can operate on slab buffers. * Note that this should be used sparingly as slab buffers * cannot be combined efficiently by GRO! */ struct sk_buff *slab_build_skb(void *data) { struct sk_buff *skb; unsigned int size; skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); data = __slab_build_skb(skb, data, &size); __finalize_skb_around(skb, data, size); return skb; } EXPORT_SYMBOL(slab_build_skb); /* Caller must provide SKB that is memset cleared */ static void __build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { unsigned int size = frag_size; /* frag_size == 0 is considered deprecated now. Callers * using slab buffer should use slab_build_skb() instead. */ if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) data = __slab_build_skb(skb, data, &size); __finalize_skb_around(skb, data, size); } /** * __build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data (must not be 0) * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated from the page * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc() * allocation is deprecated, and callers should use slab_build_skb() * instead.) * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : * Before IO, driver allocates only data buffer where NIC put incoming frame * Driver should add room at head (NET_SKB_PAD) and * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) * After IO, driver calls build_skb(), to allocate sk_buff and populate it * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, frag_size); return skb; } /* build_skb() is wrapper over __build_skb(), that specifically * takes care of skb->head and skb->pfmemalloc */ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); if (likely(skb && frag_size)) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(build_skb); /** * build_skb_around - build a network buffer around provided skb * @skb: sk_buff provide by caller, must be memset cleared * @data: data buffer provided by caller * @frag_size: size of data */ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { if (unlikely(!skb)) return NULL; __build_skb_around(skb, data, frag_size); if (frag_size) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(build_skb_around); /** * __napi_build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data * * Version of __build_skb() that uses NAPI percpu caches to obtain * skbuff_head instead of inplace allocation. * * Returns a new &sk_buff on success, %NULL on allocation failure. */ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = napi_skb_cache_get(); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, frag_size); return skb; } /** * napi_build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data * * Version of __napi_build_skb() that takes care of skb->head_frag * and skb->pfmemalloc when the data is a page or page fragment. * * Returns a new &sk_buff on success, %NULL on allocation failure. */ struct sk_buff *napi_build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __napi_build_skb(data, frag_size); if (likely(skb) && frag_size) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(napi_build_skb); /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells * the caller if emergency pfmemalloc reserves are being used. If it is and * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves * may be used. Otherwise, the packet data may be discarded until enough * memory is free */ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, bool *pfmemalloc) { bool ret_pfmemalloc = false; size_t obj_size; void *obj; obj_size = SKB_HEAD_ALIGN(*size); if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); *size = SKB_SMALL_HEAD_CACHE_SIZE; if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node); goto out; } obj_size = kmalloc_size_roundup(obj_size); /* The following cast might truncate high-order bits of obj_size, this * is harmless because kmalloc(obj_size >= 2^32) will fail anyway. */ *size = (unsigned int)obj_size; /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail. */ obj = kmalloc_node_track_caller(obj_size, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; obj = kmalloc_node_track_caller(obj_size, flags, node); out: if (pfmemalloc) *pfmemalloc = ret_pfmemalloc; return obj; } /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. * */ /** * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache * instead of head cache and allocate a cloned (child) skb. * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of at least size bytes. The object has a reference count * of one. The return is the buffer. On a failure the return is %NULL. * * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int flags, int node) { struct kmem_cache *cache; struct sk_buff *skb; bool pfmemalloc; u8 *data; cache = (flags & SKB_ALLOC_FCLONE) ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache; if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI && likely(node == NUMA_NO_NODE || node == numa_mem_id())) skb = napi_skb_cache_get(); else skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node); if (unlikely(!skb)) return NULL; prefetchw(skb); /* We do our best to align skb_shared_info on a separate cache * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; /* kmalloc_size_roundup() might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ prefetchw(data + SKB_WITH_OVERHEAD(size)); /* * Only clear those fields we need to clear, not those that we will * actually initialise below. Hence, don't put any more fields after * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, size); skb->pfmemalloc = pfmemalloc; if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); } return skb; nodata: kmem_cache_free(cache, skb); return NULL; } EXPORT_SYMBOL(__alloc_skb); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has NET_SKB_PAD headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. */ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { struct page_frag_cache *nc; struct sk_buff *skb; bool pfmemalloc; void *data; len += NET_SKB_PAD; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(1024) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } len = SKB_HEAD_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = page_frag_cache_is_pfmemalloc(nc); } else { local_bh_disable(); local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = page_frag_cache_is_pfmemalloc(nc); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); local_bh_enable(); } if (unlikely(!data)) return NULL; skb = __build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; skb_fail: return skb; } EXPORT_SYMBOL(__netdev_alloc_skb); /** * napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for * @len: length to allocate * * Allocate a new sk_buff for use in NAPI receive. This buffer will * attempt to allocate the head from a special reserved region used * only for NAPI Rx allocation. By doing this we can save several * CPU cycles by avoiding having to disable and re-enable IRQs. * * %NULL is returned if there is no free memory. */ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) { gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN; struct napi_alloc_cache *nc; struct sk_buff *skb; bool pfmemalloc; void *data; DEBUG_NET_WARN_ON_ONCE(!in_softirq()); len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. * When the small frag allocator is available, prefer it over kmalloc * for small fragments */ if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { /* we are artificially inflating the allocation size, but * that is not as bad as it may look like, as: * - 'len' less than GRO_MAX_HEAD makes little sense * - On most systems, larger 'len' values lead to fragment * size above 512 bytes * - kmalloc would use the kmalloc-1k slab for such values * - Builds with smaller GRO_MAX_HEAD will very likely do * little networking, as that implies no WiFi and no * tunnels support, and 32 bits arches. */ len = SZ_1K; data = page_frag_alloc_1k(&nc->page_small, gfp_mask); pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); } else { len = SKB_HEAD_ALIGN(len); data = page_frag_alloc(&nc->page, len, gfp_mask); pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); } local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) return NULL; skb = __napi_build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); skb->dev = napi->dev; skb_fail: return skb; } EXPORT_SYMBOL(napi_alloc_skb); void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size, unsigned int truesize) { DEBUG_NET_WARN_ON_ONCE(size > truesize); skb_fill_netmem_desc(skb, i, netmem, off, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag_netmem); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; DEBUG_NET_WARN_ON_ONCE(size > truesize); skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_coalesce_rx_frag); static void skb_drop_list(struct sk_buff **listp) { kfree_skb_list(*listp); *listp = NULL; } static inline void skb_drop_fraglist(struct sk_buff *skb) { skb_drop_list(&skb_shinfo(skb)->frag_list); } static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; skb_walk_frags(skb, list) skb_get(list); } static bool is_pp_netmem(netmem_ref netmem) { return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE; } int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { #if IS_ENABLED(CONFIG_PAGE_POOL) u32 size, truesize, len, max_head_size, off; struct sk_buff *skb = *pskb, *nskb; int err, i, head_off; void *data; /* XDP does not support fraglist so we need to linearize * the skb. */ if (skb_has_frag_list(skb)) return -EOPNOTSUPP; max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom); if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE) return -ENOMEM; size = min_t(u32, skb->len, max_head_size); truesize = SKB_HEAD_ALIGN(size) + headroom; data = page_pool_dev_alloc_va(pool, &truesize); if (!data) return -ENOMEM; nskb = napi_build_skb(data, truesize); if (!nskb) { page_pool_free_va(pool, data, true); return -ENOMEM; } skb_reserve(nskb, headroom); skb_copy_header(nskb, skb); skb_mark_for_recycle(nskb); err = skb_copy_bits(skb, 0, nskb->data, size); if (err) { consume_skb(nskb); return err; } skb_put(nskb, size); head_off = skb_headroom(nskb) - skb_headroom(skb); skb_headers_offset_update(nskb, head_off); off = size; len = skb->len - off; for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { struct page *page; u32 page_off; size = min_t(u32, len, PAGE_SIZE); truesize = size; page = page_pool_dev_alloc(pool, &page_off, &truesize); if (!page) { consume_skb(nskb); return -ENOMEM; } skb_add_rx_frag(nskb, i, page, page_off, size, truesize); err = skb_copy_bits(skb, off, page_address(page) + page_off, size); if (err) { consume_skb(nskb); return err; } len -= size; off += size; } consume_skb(skb); *pskb = nskb; return 0; #else return -EOPNOTSUPP; #endif } EXPORT_SYMBOL(skb_pp_cow_data); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog) { if (!prog->aux->xdp_has_frags) return -EINVAL; return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM); } EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) bool napi_pp_put_page(netmem_ref netmem) { netmem = netmem_compound_head(netmem); /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation * in order to preserve any existing bits, such as bit 0 for the * head page of compound page and bit 1 for pfmemalloc page, so * mask those bits for freeing side when doing below checking, * and page_is_pfmemalloc() is checked in __page_pool_put_page() * to avoid recycling the pfmemalloc page. */ if (unlikely(!is_pp_netmem(netmem))) return false; page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); return true; } EXPORT_SYMBOL(napi_pp_put_page); #endif static bool skb_pp_recycle(struct sk_buff *skb, void *data) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; return napi_pp_put_page(page_to_netmem(virt_to_page(data))); } /** * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb * @skb: page pool aware skb * * Increase the fragment reference count (pp_ref_count) of a skb. This is * intended to gain fragment references only for page pool aware skbs, * i.e. when skb->pp_recycle is true, and not for fragments in a * non-pp-recycling skb. It has a fallback to increase references on normal * pages, as page pool aware skbs may also have normal page fragments. */ static int skb_pp_frag_ref(struct sk_buff *skb) { struct skb_shared_info *shinfo; netmem_ref head_netmem; int i; if (!skb->pp_recycle) return -EINVAL; shinfo = skb_shinfo(skb); for (i = 0; i < shinfo->nr_frags; i++) { head_netmem = netmem_compound_head(shinfo->frags[i].netmem); if (likely(is_pp_netmem(head_netmem))) page_pool_ref_netmem(head_netmem); else page_ref_inc(netmem_to_page(head_netmem)); } return 0; } static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) kmem_cache_free(net_hotdata.skb_small_head_cache, head); else kfree(head); } static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; if (skb->head_frag) { if (skb_pp_recycle(skb, head)) return; skb_free_frag(head); } else { skb_kfree_head(head, skb_end_offset(skb)); } } static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; if (!skb_data_unref(skb, shinfo)) goto exit; if (skb_zcopy(skb)) { bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS; skb_zcopy_clear(skb, true); if (skip_unref) goto free_head; } for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); free_head: if (shinfo->frag_list) kfree_skb_list_reason(shinfo->frag_list, reason); skb_free_head(skb); exit: /* When we clone an SKB we copy the reycling bit. The pp_recycle * bit is only set on the head though, so in order to avoid races * while trying to recycle fragments on __skb_frag_unref() we need * to make one SKB responsible for triggering the recycle path. * So disable the recycling bit if an SKB is cloned and we have * additional references to the fragmented part of the SKB. * Eventually the last SKB will have the recycling bit set and it's * dataref set to 0, which will trigger the recycling */ skb->pp_recycle = 0; } /* * Free an skbuff by memory without cleaning the state. */ static void kfree_skbmem(struct sk_buff *skb) { struct sk_buff_fclones *fclones; switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: kmem_cache_free(net_hotdata.skbuff_cache, skb); return; case SKB_FCLONE_ORIG: fclones = container_of(skb, struct sk_buff_fclones, skb1); /* We usually free the clone (TX completion) before original skb * This test would have no chance to be true for the clone, * while here, branch prediction will be good. */ if (refcount_read(&fclones->fclone_ref) == 1) goto fastpath; break; default: /* SKB_FCLONE_CLONE */ fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones); } void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { DEBUG_NET_WARN_ON_ONCE(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); #endif skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) { skb_release_head_state(skb); if (likely(skb->head)) skb_release_data(skb, reason); } /** * __kfree_skb - private function * @skb: buffer * * Free an sk_buff. Release anything attached to the buffer. * Clean the state. This is an internal helper function. Users should * always call kfree_skb */ void __kfree_skb(struct sk_buff *skb) { skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); static __always_inline bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { if (unlikely(!skb_unref(skb))) return false; DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET || u32_get_bits(reason, SKB_DROP_REASON_SUBSYS_MASK) >= SKB_DROP_REASON_SUBSYS_NUM); if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else trace_kfree_skb(skb, __builtin_return_address(0), reason, sk); return true; } /** * sk_skb_reason_drop - free an sk_buff with special reason * @sk: the socket to receive @skb, or NULL if not applicable * @skb: buffer to free * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has hit * zero. Meanwhile, pass the receiving socket and drop reason to * 'kfree_skb' tracepoint. */ void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { if (__sk_skb_reason_drop(sk, skb, reason)) __kfree_skb(skb); } EXPORT_SYMBOL(sk_skb_reason_drop); #define KFREE_SKB_BULK_SIZE 16 struct skb_free_array { unsigned int skb_count; void *skb_array[KFREE_SKB_BULK_SIZE]; }; static void kfree_skb_add_bulk(struct sk_buff *skb, struct skb_free_array *sa, enum skb_drop_reason reason) { /* if SKB is a clone, don't handle this case */ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) { __kfree_skb(skb); return; } skb_release_all(skb, reason); sa->skb_array[sa->skb_count++] = skb; if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) { kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE, sa->skb_array); sa->skb_count = 0; } } void __fix_address kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) { struct skb_free_array sa; sa.skb_count = 0; while (segs) { struct sk_buff *next = segs->next; if (__sk_skb_reason_drop(NULL, segs, reason)) { skb_poison_list(segs); kfree_skb_add_bulk(segs, &sa, reason); } segs = next; } if (sa.skb_count) kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array); } EXPORT_SYMBOL(kfree_skb_list_reason); /* Dump skb information and contents. * * Must only be called from net_ratelimit()-ed paths. * * Dumps whole packets if full_pkt, only headers otherwise. */ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) { struct skb_shared_info *sh = skb_shinfo(skb); struct net_device *dev = skb->dev; struct sock *sk = skb->sk; struct sk_buff *list_skb; bool has_mac, has_trans; int headroom, tailroom; int i, len, seg_len; if (full_pkt) len = skb->len; else len = min_t(int, skb->len, MAX_HEADER + 128); headroom = skb_headroom(skb); tailroom = skb_tailroom(skb); has_mac = skb_mac_header_was_set(skb); has_trans = skb_transport_header_was_set(skb); printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n" "mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n" "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n" "csum(0x%x start=%u offset=%u ip_summed=%u complete_sw=%u valid=%u level=%u)\n" "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n" "priority=0x%x mark=0x%x alloc_cpu=%u vlan_all=0x%x\n" "encapsulation=%d inner(proto=0x%04x, mac=%u, net=%u, trans=%u)\n", level, skb->len, headroom, skb_headlen(skb), tailroom, has_mac ? skb->mac_header : -1, has_mac ? skb_mac_header_len(skb) : -1, skb->mac_len, skb->network_header, has_trans ? skb_network_header_len(skb) : -1, has_trans ? skb->transport_header : -1, sh->tx_flags, sh->nr_frags, sh->gso_size, sh->gso_type, sh->gso_segs, skb->csum, skb->csum_start, skb->csum_offset, skb->ip_summed, skb->csum_complete_sw, skb->csum_valid, skb->csum_level, skb->hash, skb->sw_hash, skb->l4_hash, ntohs(skb->protocol), skb->pkt_type, skb->skb_iif, skb->priority, skb->mark, skb->alloc_cpu, skb->vlan_all, skb->encapsulation, skb->inner_protocol, skb->inner_mac_header, skb->inner_network_header, skb->inner_transport_header); if (dev) printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", level, sk->sk_family, sk->sk_type, sk->sk_protocol); if (full_pkt && headroom) print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb->head, headroom, false); seg_len = min_t(int, skb_headlen(skb), len); if (seg_len) print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, seg_len, false); len -= seg_len; if (full_pkt && tailroom) print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb_tail_pointer(skb), tailroom, false); for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (skb_frag_is_net_iov(frag)) { printk("%sskb frag %d: not readable\n", level, i); len -= skb_frag_size(frag); if (!len) break; continue; } skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { seg_len = min_t(int, p_len, len); vaddr = kmap_atomic(p); print_hex_dump(level, "skb frag: ", DUMP_PREFIX_OFFSET, 16, 1, vaddr + p_off, seg_len, false); kunmap_atomic(vaddr); len -= seg_len; if (!len) break; } } if (full_pkt && skb_has_frag_list(skb)) { printk("skb fraglist:\n"); skb_walk_frags(skb, list_skb) skb_dump(level, list_skb, true); } } EXPORT_SYMBOL(skb_dump); /** * skb_tx_error - report an sk_buff xmit error * @skb: buffer that triggered an error * * Report xmit error if a device callback is tracking this skb. * skb must be freed afterwards. */ void skb_tx_error(struct sk_buff *skb) { if (skb) { skb_zcopy_downgrade_managed(skb); skb_zcopy_clear(skb, true); } } EXPORT_SYMBOL(skb_tx_error); #ifdef CONFIG_TRACEPOINTS /** * consume_skb - free an skbuff * @skb: buffer to free * * Drop a ref to the buffer and free it if the usage count has hit zero * Functions identically to kfree_skb, but kfree_skb assumes that the frame * is being dropped after a failure and notes that */ void consume_skb(struct sk_buff *skb) { if (!skb_unref(skb)) return; trace_consume_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); #endif /** * __consume_stateless_skb - free an skbuff, assuming it is stateless * @skb: buffer to free * * Alike consume_skb(), but this variant assumes that this is the last * skb reference and all the head states have been already dropped */ void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb, __builtin_return_address(0)); skb_release_data(skb, SKB_CONSUMED); kfree_skbmem(skb); } static void napi_skb_cache_put(struct sk_buff *skb) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); u32 i; if (!kasan_mempool_poison_object(skb)) return; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc->skb_cache[nc->skb_count++] = skb; if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) kasan_mempool_unpoison_object(nc->skb_cache[i], kmem_cache_size(net_hotdata.skbuff_cache)); kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF, nc->skb_cache + NAPI_SKB_CACHE_HALF); nc->skb_count = NAPI_SKB_CACHE_HALF; } local_unlock_nested_bh(&napi_alloc_cache.bh_lock); } void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason) { skb_release_all(skb, reason); napi_skb_cache_put(skb); } void napi_skb_free_stolen_head(struct sk_buff *skb) { if (unlikely(skb->slow_gro)) { nf_reset_ct(skb); skb_dst_drop(skb); skb_ext_put(skb); skb_orphan(skb); skb->slow_gro = 0; } napi_skb_cache_put(skb); } void napi_consume_skb(struct sk_buff *skb, int budget) { /* Zero budget indicate non-NAPI context called us, like netpoll */ if (unlikely(!budget)) { dev_consume_skb_any(skb); return; } DEBUG_NET_WARN_ON_ONCE(!in_softirq()); if (!skb_unref(skb)) return; /* if reaching here SKB is ready to free */ trace_consume_skb(skb, __builtin_return_address(0)); /* if SKB is a clone, don't handle this case */ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { __kfree_skb(skb); return; } skb_release_all(skb, SKB_CONSUMED); napi_skb_cache_put(skb); } EXPORT_SYMBOL(napi_consume_skb); /* Make sure a field is contained by headers group */ #define CHECK_SKB_FIELD(field) \ BUILD_BUG_ON(offsetof(struct sk_buff, field) != \ offsetof(struct sk_buff, headers.field)); \ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; /* We do not copy old->sk */ new->dev = old->dev; memcpy(new->cb, old->cb, sizeof(old->cb)); skb_dst_copy(new, old); __skb_ext_copy(new, old); __nf_copy(new, old, false); /* Note : this field could be in the headers group. * It is not yet because we do not want to have a 16 bit hole */ new->queue_mapping = old->queue_mapping; memcpy(&new->headers, &old->headers, sizeof(new->headers)); CHECK_SKB_FIELD(protocol); CHECK_SKB_FIELD(csum); CHECK_SKB_FIELD(hash); CHECK_SKB_FIELD(priority); CHECK_SKB_FIELD(skb_iif); CHECK_SKB_FIELD(vlan_proto); CHECK_SKB_FIELD(vlan_tci); CHECK_SKB_FIELD(transport_header); CHECK_SKB_FIELD(network_header); CHECK_SKB_FIELD(mac_header); CHECK_SKB_FIELD(inner_protocol); CHECK_SKB_FIELD(inner_transport_header); CHECK_SKB_FIELD(inner_network_header); CHECK_SKB_FIELD(inner_mac_header); CHECK_SKB_FIELD(mark); #ifdef CONFIG_NETWORK_SECMARK CHECK_SKB_FIELD(secmark); #endif #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif CHECK_SKB_FIELD(alloc_cpu); #ifdef CONFIG_XPS CHECK_SKB_FIELD(sender_cpu); #endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); #endif } /* * You should not add any new code to this function. Add it to * __copy_skb_header above instead. */ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) { #define C(x) n->x = skb->x n->next = n->prev = NULL; n->sk = NULL; __copy_skb_header(n, skb); C(len); C(data_len); C(mac_len); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; n->peeked = 0; C(pfmemalloc); C(pp_recycle); n->destructor = NULL; C(tail); C(end); C(head); C(head_frag); C(data); C(truesize); refcount_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; return n; #undef C } /** * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg * @first: first sk_buff of the msg */ struct sk_buff *alloc_skb_for_msg(struct sk_buff *first) { struct sk_buff *n; n = alloc_skb(0, GFP_ATOMIC); if (!n) return NULL; n->len = first->len; n->data_len = first->len; n->truesize = first->truesize; skb_shinfo(n)->frag_list = first; __copy_skb_header(n, first); n->destructor = NULL; return n; } EXPORT_SYMBOL_GPL(alloc_skb_for_msg); /** * skb_morph - morph one skb into another * @dst: the skb to receive the contents * @src: the skb to supply the contents * * This is identical to skb_clone except that the target skb is * supplied by the user. * * The target skb is returned upon exit. */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { skb_release_all(dst, SKB_CONSUMED); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); int mm_account_pinned_pages(struct mmpin *mmp, size_t size) { unsigned long max_pg, num_pg, new_pg, old_pg, rlim; struct user_struct *user; if (capable(CAP_IPC_LOCK) || !size) return 0; rlim = rlimit(RLIMIT_MEMLOCK); if (rlim == RLIM_INFINITY) return 0; num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */ max_pg = rlim >> PAGE_SHIFT; user = mmp->user ? : current_user(); old_pg = atomic_long_read(&user->locked_vm); do { new_pg = old_pg + num_pg; if (new_pg > max_pg) return -ENOBUFS; } while (!atomic_long_try_cmpxchg(&user->locked_vm, &old_pg, new_pg)); if (!mmp->user) { mmp->user = get_uid(user); mmp->num_pg = num_pg; } else { mmp->num_pg += num_pg; } return 0; } EXPORT_SYMBOL_GPL(mm_account_pinned_pages); void mm_unaccount_pinned_pages(struct mmpin *mmp) { if (mmp->user) { atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); free_uid(mmp->user); } } EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) { struct ubuf_info_msgzc *uarg; struct sk_buff *skb; WARN_ON_ONCE(!in_task()); skb = sock_omalloc(sk, 0, GFP_KERNEL); if (!skb) return NULL; BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb)); uarg = (void *)skb->cb; uarg->mmp.user = NULL; if (mm_account_pinned_pages(&uarg->mmp, size)) { kfree_skb(skb); return NULL; } uarg->ubuf.ops = &msg_zerocopy_ubuf_ops; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; refcount_set(&uarg->ubuf.refcnt, 1); sock_hold(sk); return &uarg->ubuf; } static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg) { return container_of((void *)uarg, struct sk_buff, cb); } struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg) { if (uarg) { struct ubuf_info_msgzc *uarg_zc; const u32 byte_limit = 1 << 19; /* limit to a few TSO */ u32 bytelen, next; /* there might be non MSG_ZEROCOPY users */ if (uarg->ops != &msg_zerocopy_ubuf_ops) return NULL; /* realloc only when socket is locked (TCP, UDP cork), * so uarg->len and sk_zckey access is serialized */ if (!sock_owned_by_user(sk)) { WARN_ON_ONCE(1); return NULL; } uarg_zc = uarg_to_msgzc(uarg); bytelen = uarg_zc->bytelen + size; if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) { /* TCP can create new skb to attach new uarg */ if (sk->sk_type == SOCK_STREAM) goto new_alloc; return NULL; } next = (u32)atomic_read(&sk->sk_zckey); if ((u32)(uarg_zc->id + uarg_zc->len) == next) { if (mm_account_pinned_pages(&uarg_zc->mmp, size)) return NULL; uarg_zc->len++; uarg_zc->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); /* no extra ref when appending to datagram (MSG_MORE) */ if (sk->sk_type == SOCK_STREAM) net_zcopy_get(uarg); return uarg; } } new_alloc: return msg_zerocopy_alloc(sk, size); } EXPORT_SYMBOL_GPL(msg_zerocopy_realloc); static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) { struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); u32 old_lo, old_hi; u64 sum_len; old_lo = serr->ee.ee_info; old_hi = serr->ee.ee_data; sum_len = old_hi - old_lo + 1ULL + len; if (sum_len >= (1ULL << 32)) return false; if (lo != old_hi + 1) return false; serr->ee.ee_data += len; return true; } static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg) { struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sock_exterr_skb *serr; struct sock *sk = skb->sk; struct sk_buff_head *q; unsigned long flags; bool is_zerocopy; u32 lo, hi; u16 len; mm_unaccount_pinned_pages(&uarg->mmp); /* if !len, there was only 1 call, and it was aborted * so do not queue a completion notification */ if (!uarg->len || sock_flag(sk, SOCK_DEAD)) goto release; len = uarg->len; lo = uarg->id; hi = uarg->id + len - 1; is_zerocopy = uarg->zerocopy; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = 0; serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; if (!is_zerocopy) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; q = &sk->sk_error_queue; spin_lock_irqsave(&q->lock, flags); tail = skb_peek_tail(q); if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY || !skb_zerocopy_notify_extend(tail, lo, len)) { __skb_queue_tail(q, skb); skb = NULL; } spin_unlock_irqrestore(&q->lock, flags); sk_error_report(sk); release: consume_skb(skb); sock_put(sk); } static void msg_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *uarg, bool success) { struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg); uarg_zc->zerocopy = uarg_zc->zerocopy & success; if (refcount_dec_and_test(&uarg->refcnt)) __msg_zerocopy_callback(uarg_zc); } void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk; atomic_dec(&sk->sk_zckey); uarg_to_msgzc(uarg)->len--; if (have_uref) msg_zerocopy_complete(NULL, uarg, true); } EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); const struct ubuf_info_ops msg_zerocopy_ubuf_ops = { .complete = msg_zerocopy_complete, }; EXPORT_SYMBOL_GPL(msg_zerocopy_ubuf_ops); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { int err, orig_len = skb->len; if (uarg->ops->link_skb) { err = uarg->ops->link_skb(skb, uarg); if (err) return err; } else { struct ubuf_info *orig_uarg = skb_zcopy(skb); /* An skb can only point to one uarg. This edge case happens * when TCP appends to an skb, but zerocopy_realloc triggered * a new alloc. */ if (orig_uarg && uarg != orig_uarg) return -EEXIST; } err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { struct sock *save_sk = skb->sk; /* Streams do not free skb on error. Reset to prev state. */ iov_iter_revert(&msg->msg_iter, skb->len - orig_len); skb->sk = sk; ___pskb_trim(skb, orig_len); skb->sk = save_sk; return err; } skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); void __skb_zcopy_downgrade_managed(struct sk_buff *skb) { int i; skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); } EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed); static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, gfp_t gfp_mask) { if (skb_zcopy(orig)) { if (skb_zcopy(nskb)) { /* !gfp_mask callers are verified to !skb_zcopy(nskb) */ if (!gfp_mask) { WARN_ON_ONCE(1); return -ENOMEM; } if (skb_uarg(nskb) == skb_uarg(orig)) return 0; if (skb_copy_ubufs(nskb, GFP_ATOMIC)) return -EIO; } skb_zcopy_set(nskb, skb_uarg(orig), NULL); } return 0; } /** * skb_copy_ubufs - copy userspace skb frags buffers to kernel * @skb: the skb to modify * @gfp_mask: allocation priority * * This must be called on skb with SKBFL_ZEROCOPY_ENABLE. * It will copy all frags into kernel and drop the reference * to userspace pages. * * If this function is called from an interrupt gfp_mask() must be * %GFP_ATOMIC. * * Returns 0 on success or a negative error code on failure * to allocate kernel memory to copy to. */ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int num_frags = skb_shinfo(skb)->nr_frags; struct page *page, *head = NULL; int i, order, psize, new_frags; u32 d_off; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; if (!skb_frags_readable(skb)) return -EFAULT; if (!num_frags) goto release; /* We might have to allocate high order pages, so compute what minimum * page order is needed. */ order = 0; while ((PAGE_SIZE << order) * MAX_SKB_FRAGS < __skb_pagelen(skb)) order++; psize = (PAGE_SIZE << order); new_frags = (__skb_pagelen(skb) + psize - 1) >> (PAGE_SHIFT + order); for (i = 0; i < new_frags; i++) { page = alloc_pages(gfp_mask | __GFP_COMP, order); if (!page) { while (head) { struct page *next = (struct page *)page_private(head); put_page(head); head = next; } return -ENOMEM; } set_page_private(page, (unsigned long)head); head = page; } page = head; d_off = 0; for (i = 0; i < num_frags; i++) { skb_frag_t *f = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), p, p_off, p_len, copied) { u32 copy, done = 0; vaddr = kmap_atomic(p); while (done < p_len) { if (d_off == psize) { d_off = 0; page = (struct page *)page_private(page); } copy = min_t(u32, psize - d_off, p_len - done); memcpy(page_address(page) + d_off, vaddr + p_off + done, copy); done += copy; d_off += copy; } kunmap_atomic(vaddr); } } /* skb frags release userspace buffers */ for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); /* skb frags point to kernel buffers */ for (i = 0; i < new_frags - 1; i++) { __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize); head = (struct page *)page_private(head); } __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0, d_off); skb_shinfo(skb)->nr_frags = new_frags; release: skb_zcopy_clear(skb, false); return 0; } EXPORT_SYMBOL_GPL(skb_copy_ubufs); /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone * @gfp_mask: allocation priority * * Duplicate an &sk_buff. The new one is not owned by a socket. Both * copies share the same packet data but not structure. The new * buffer has a reference count of 1. If the allocation fails the * function returns %NULL otherwise the new buffer is returned. * * If this function is called from an interrupt gfp_mask() must be * %GFP_ATOMIC. */ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff_fclones *fclones = container_of(skb, struct sk_buff_fclones, skb1); struct sk_buff *n; if (skb_orphan_frags(skb, gfp_mask)) return NULL; if (skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; refcount_set(&fclones->fclone_ref, 2); n->fclone = SKB_FCLONE_CLONE; } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask); if (!n) return NULL; n->fclone = SKB_FCLONE_UNAVAILABLE; } return __skb_clone(n, skb); } EXPORT_SYMBOL(skb_clone); void skb_headers_offset_update(struct sk_buff *skb, int off) { /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += off; /* {transport,network,mac}_header and tail are relative to skb->head */ skb->transport_header += off; skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; skb->inner_transport_header += off; skb->inner_network_header += off; skb->inner_mac_header += off; } EXPORT_SYMBOL(skb_headers_offset_update); void skb_copy_header(struct sk_buff *new, const struct sk_buff *old) { __copy_skb_header(new, old); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } EXPORT_SYMBOL(skb_copy_header); static inline int skb_alloc_rx_flag(const struct sk_buff *skb) { if (skb_pfmemalloc(skb)) return SKB_ALLOC_RX; return 0; } /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data. This is used when the * caller wishes to modify the data and needs a private copy of the * data to alter. Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * As by-product this function converts non-linear &sk_buff to linear * one, so that &sk_buff becomes completely private and caller is allowed * to modify all the data of returned buffer. This means that this * function is not recommended for use in circumstances when only * header is going to be modified. Use pskb_copy() instead. */ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; unsigned int size; int headerlen; if (!skb_frags_readable(skb)) return NULL; if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) return NULL; headerlen = skb_headroom(skb); size = skb_end_offset(skb) + skb->data_len; n = __alloc_skb(size, gfp_mask, skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; /* Set the data pointer */ skb_reserve(n, headerlen); /* Set the tail pointer and length */ skb_put(n, skb->len); BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); skb_copy_header(n, skb); return n; } EXPORT_SYMBOL(skb_copy); /** * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority * @fclone: if true allocate the copy of the skb from the fclone * cache instead of the head cache; it is recommended to set this * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when * the caller wishes to modify only header of &sk_buff and needs * private copy of the header to alter. Returns %NULL on failure * or the pointer to the buffer on success. * The returned buffer has a reference count of 1. */ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; /* Set the data pointer */ skb_reserve(n, headroom); /* Set the tail pointer and length */ skb_put(n, skb_headlen(skb)); /* Copy the bytes */ skb_copy_from_linear_data(skb, n->data, n->len); n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; if (skb_shinfo(skb)->nr_frags) { int i; if (skb_orphan_frags(skb, gfp_mask) || skb_zerocopy_clone(n, skb, gfp_mask)) { kfree_skb(n); n = NULL; goto out; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } if (skb_has_frag_list(skb)) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_clone_fraglist(n); } skb_copy_header(n, skb); out: return n; } EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate * @nhead: room to add at head * @ntail: room to add at tail * @gfp_mask: allocation priority * * Expands (or creates identical copy, if @nhead and @ntail are zero) * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have * reference count of 1. Returns zero in the case of success or error, * if expansion failed. In the last case, &sk_buff is not changed. * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) { unsigned int osize = skb_end_offset(skb); unsigned int size = osize + nhead + ntail; long off; u8 *data; int i; BUG_ON(nhead < 0); BUG_ON(skb_shared(skb)); skb_zcopy_downgrade_managed(skb); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(size); /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. */ memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); /* * if shinfo is shared we must drop the old head gracefully, but if it * is not we can just drop the old head and let the existing refcount * be since all we did is relocate the values */ if (skb_cloned(skb)) { if (skb_orphan_frags(skb, gfp_mask)) goto nofrags; if (skb_zcopy(skb)) refcount_inc(&skb_uarg(skb)->refcnt); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb, SKB_CONSUMED); } else { skb_free_head(skb); } off = (data + nhead) - skb->head; skb->head = data; skb->head_frag = 0; skb->data += off; skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET off = nhead; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); skb_metadata_clear(skb); /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). */ if (!skb->sk || skb->destructor == sock_edemux) skb->truesize += size - osize; return 0; nofrags: skb_kfree_head(data, size); nodata: return -ENOMEM; } EXPORT_SYMBOL(pskb_expand_head); /* Make private copy of skb with writable head and some headroom */ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) { struct sk_buff *skb2; int delta = headroom - skb_headroom(skb); if (delta <= 0) skb2 = pskb_copy(skb, GFP_ATOMIC); else { skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { kfree_skb(skb2); skb2 = NULL; } } return skb2; } EXPORT_SYMBOL(skb_realloc_headroom); /* Note: We plan to rework this in linux-6.4 */ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { unsigned int saved_end_offset, saved_truesize; struct skb_shared_info *shinfo; int res; saved_end_offset = skb_end_offset(skb); saved_truesize = skb->truesize; res = pskb_expand_head(skb, 0, 0, pri); if (res) return res; skb->truesize = saved_truesize; if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; /* We can not change skb->end if the original or new value * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). */ if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM || skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) { /* We think this path should not be taken. * Add a temporary trace to warn us just in case. */ pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n", saved_end_offset, skb_end_offset(skb)); WARN_ON_ONCE(1); return 0; } shinfo = skb_shinfo(skb); /* We are about to change back skb->end, * we need to move skb_shinfo() to its new location. */ memmove(skb->head + saved_end_offset, shinfo, offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); skb_set_end_offset(skb, saved_end_offset); return 0; } /** * skb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate * @headroom: needed headroom * * Unlike skb_realloc_headroom, this one does not allocate a new skb * if possible; copies skb->sk to new skb as needed * and frees original skb in case of failures. * * It expect increased headroom and generates warning otherwise. */ struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) { int delta = headroom - skb_headroom(skb); int osize = skb_end_offset(skb); struct sock *sk = skb->sk; if (WARN_ONCE(delta <= 0, "%s is expecting an increase in the headroom", __func__)) return skb; delta = SKB_DATA_ALIGN(delta); /* pskb_expand_head() might crash, if skb is shared. */ if (skb_shared(skb) || !is_skb_wmem(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) goto fail; if (sk) skb_set_owner_w(nskb, sk); consume_skb(skb); skb = nskb; } if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) goto fail; if (sk && is_skb_wmem(skb)) { delta = skb_end_offset(skb) - osize; refcount_add(delta, &sk->sk_wmem_alloc); skb->truesize += delta; } return skb; fail: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(skb_expand_head); /** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head * @newtailroom: new free bytes at tail * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data and while doing so * allocate additional space. * * This is used when the caller wishes to modify the data and needs a * private copy of the data to alter as well as more space for new fields. * Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t gfp_mask) { /* * Allocate the copy buffer */ int head_copy_len, head_copy_off; struct sk_buff *n; int oldheadroom; if (!skb_frags_readable(skb)) return NULL; if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) return NULL; oldheadroom = skb_headroom(skb); n = __alloc_skb(newheadroom + skb->len + newtailroom, gfp_mask, skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; skb_reserve(n, newheadroom); /* Set the tail pointer and length */ skb_put(n, skb->len); head_copy_len = oldheadroom; head_copy_off = 0; if (newheadroom <= head_copy_len) head_copy_len = newheadroom; else head_copy_off = newheadroom - head_copy_len; /* Copy the linear header and data. */ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, skb->len + head_copy_len)); skb_copy_header(n, skb); skb_headers_offset_update(n, newheadroom - oldheadroom); return n; } EXPORT_SYMBOL(skb_copy_expand); /** * __skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad * @free_on_error: free buffer on error * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * * May return error in out of memory cases. The skb is freed on error * if @free_on_error is true. */ int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error) { int err; int ntail; /* If the skbuff is non linear tailroom is always zero.. */ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { memset(skb->data+skb->len, 0, pad); return 0; } ntail = skb->data_len + pad - (skb->end - skb->tail); if (likely(skb_cloned(skb) || ntail > 0)) { err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); if (unlikely(err)) goto free_skb; } /* FIXME: The use of this function with non-linear skb's really needs * to be audited. */ err = skb_linearize(skb); if (unlikely(err)) goto free_skb; memset(skb->data + skb->len, 0, pad); return 0; free_skb: if (free_on_error) kfree_skb(skb); return err; } EXPORT_SYMBOL(__skb_pad); /** * pskb_put - add data to the tail of a potentially fragmented buffer * @skb: start of the buffer to use * @tail: tail fragment of the buffer to use * @len: amount of data to add * * This function extends the used data area of the potentially * fragmented buffer. @tail must be the last fragment of @skb -- or * @skb itself. If this would exceed the total buffer size the kernel * will panic. A pointer to the first byte of the extra data is * returned. */ void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) { if (tail != skb) { skb->data_len += len; skb->len += len; } return skb_put(tail, len); } EXPORT_SYMBOL_GPL(pskb_put); /** * skb_put - add data to a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer. If this would * exceed the total buffer size the kernel will panic. A pointer to the * first byte of the extra data is returned. */ void *skb_put(struct sk_buff *skb, unsigned int len) { void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; if (unlikely(skb->tail > skb->end)) skb_over_panic(skb, len, __builtin_return_address(0)); return tmp; } EXPORT_SYMBOL(skb_put); /** * skb_push - add data to the start of a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer at the buffer * start. If this would exceed the total buffer headroom the kernel will * panic. A pointer to the first byte of the extra data is returned. */ void *skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; skb->len += len; if (unlikely(skb->data < skb->head)) skb_under_panic(skb, len, __builtin_return_address(0)); return skb->data; } EXPORT_SYMBOL(skb_push); /** * skb_pull - remove data from the start of a buffer * @skb: buffer to use * @len: amount of data to remove * * This function removes data from the start of a buffer, returning * the memory to the headroom. A pointer to the next data in the buffer * is returned. Once the data has been pulled future pushes will overwrite * the old data. */ void *skb_pull(struct sk_buff *skb, unsigned int len) { return skb_pull_inline(skb, len); } EXPORT_SYMBOL(skb_pull); /** * skb_pull_data - remove data from the start of a buffer returning its * original position. * @skb: buffer to use * @len: amount of data to remove * * This function removes data from the start of a buffer, returning * the memory to the headroom. A pointer to the original data in the buffer * is returned after checking if there is enough data to pull. Once the * data has been pulled future pushes will overwrite the old data. */ void *skb_pull_data(struct sk_buff *skb, size_t len) { void *data = skb->data; if (skb->len < len) return NULL; skb_pull(skb, len); return data; } EXPORT_SYMBOL(skb_pull_data); /** * skb_trim - remove end from a buffer * @skb: buffer to alter * @len: new length * * Cut the length of a buffer down by removing data from the tail. If * the buffer is already under the length specified it is not modified. * The skb must be linear. */ void skb_trim(struct sk_buff *skb, unsigned int len) { if (skb->len > len) __skb_trim(skb, len); } EXPORT_SYMBOL(skb_trim); /* Trims skb to length len. It can change skb pointers. */ int ___pskb_trim(struct sk_buff *skb, unsigned int len) { struct sk_buff **fragp; struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; int err; if (skb_cloned(skb) && unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; i = 0; if (offset >= len) goto drop_pages; for (; i < nfrags; i++) { int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); if (end < len) { offset = end; continue; } skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); fragp = &frag->next) { int end = offset + frag->len; if (skb_shared(frag)) { struct sk_buff *nfrag; nfrag = skb_clone(frag, GFP_ATOMIC); if (unlikely(!nfrag)) return -ENOMEM; nfrag->next = frag->next; consume_skb(frag); frag = nfrag; *fragp = frag; } if (end < len) { offset = end; continue; } if (end > len && unlikely((err = pskb_trim(frag, len - offset)))) return err; if (frag->next) skb_drop_list(&frag->next); break; } done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { skb->len = len; skb->data_len = 0; skb_set_tail_pointer(skb, len); } if (!skb->sk || skb->destructor == sock_edemux) skb_condense(skb); return 0; } EXPORT_SYMBOL(___pskb_trim); /* Note : use pskb_trim_rcsum() instead of calling this directly */ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; skb->csum = csum_block_sub(skb->csum, skb_checksum(skb, len, delta, 0), len); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; int offset = skb_checksum_start_offset(skb) + skb->csum_offset; if (offset + sizeof(__sum16) > hdlen) return -EINVAL; } return __pskb_trim(skb, len); } EXPORT_SYMBOL(pskb_trim_rcsum_slow); /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate * @delta: number of bytes to advance tail * * The function makes a sense only on a fragmented &sk_buff, * it expands header moving its tail forward and copying necessary * data from fragmented part. * * &sk_buff MUST have reference count of 1. * * Returns %NULL (and &sk_buff does not change) if pull failed * or value of new tail of skb in the case of success. * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. */ /* Moves tail of skb head forward, copying data from fragmented part, * when it is necessary. * 1. It may fail due to malloc failure. * 2. It may change skb pointers. * * It is pretty complicated. Luckily, it is called only in exceptional cases. */ void *__pskb_pull_tail(struct sk_buff *skb, int delta) { /* If skb has not enough free space at tail, get new one * plus 128 bytes for future expansions. If we have enough * room at tail, reallocate without expansion only if skb is cloned. */ int i, k, eat = (skb->tail + delta) - skb->end; if (!skb_frags_readable(skb)) return NULL; if (eat > 0 || skb_cloned(skb)) { if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, GFP_ATOMIC)) return NULL; } BUG_ON(skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ if (!skb_has_frag_list(skb)) goto pull_pages; /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size >= eat) goto pull_pages; eat -= size; } /* If we need update frag list, we are in troubles. * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. * Pure masohism, indeed. 8)8) */ if (eat) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_is_gso(skb) && !list->head_frag && skb_headlen(list)) skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; if (skb_shared(list)) { /* Sucks! We need to fork list. :-( */ clone = skb_clone(list, GFP_ATOMIC); if (!clone) return NULL; insp = list->next; list = clone; } else { /* This may be pulled without * problems. */ insp = list; } if (!pskb_pull(list, eat)) { kfree_skb(clone); return NULL; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; consume_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; skb_shinfo(skb)->frag_list = clone; } } /* Success! Now we may commit changes to skb data. */ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size <= eat) { skb_frag_unref(skb, i); eat -= size; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; *frag = skb_shinfo(skb)->frags[i]; if (eat) { skb_frag_off_add(frag, eat); skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; } k++; } } skb_shinfo(skb)->nr_frags = k; end: skb->tail += delta; skb->data_len -= delta; if (!skb->data_len) skb_zcopy_clear(skb, false); return skb_tail_pointer(skb); } EXPORT_SYMBOL(__pskb_pull_tail); /** * skb_copy_bits - copy bits from skb to kernel buffer * @skb: source skb * @offset: offset in source * @to: destination buffer * @len: number of bytes to copy * * Copy the specified number of bytes from the source skb to the * destination buffer. * * CAUTION ! : * If its prototype is ever changed, * check arch/{*}/net/{*}.S files, * since it is called from BPF assembly code. */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); struct sk_buff *frag_iter; int i, copy; if (offset > (int)skb->len - len) goto fault; /* Copy header. */ if ((copy = start - offset) > 0) { if (copy > len) copy = len; skb_copy_from_linear_data_offset(skb, offset, to, copy); if ((len -= copy) == 0) return 0; offset += copy; to += copy; } if (!skb_frags_readable(skb)) goto fault; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *f = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(f, skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); kunmap_atomic(vaddr); } if ((len -= copy) == 0) return 0; offset += copy; to += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_copy_bits(frag_iter, offset - start, to, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; to += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_copy_bits); /* * Callback from splice_to_pipe(), if we need to release some pages * at the end of the spd in case we error'ed out in filling the pipe. */ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { put_page(spd->pages[i]); } static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sock *sk) { struct page_frag *pfrag = sk_page_frag(sk); if (!sk_page_frag_refill(sk, pfrag)) return NULL; *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); memcpy(page_address(pfrag->page) + pfrag->offset, page_address(page) + *offset, *len); *offset = pfrag->offset; pfrag->offset += *len; return pfrag->page; } static bool spd_can_coalesce(const struct splice_pipe_desc *spd, struct page *page, unsigned int offset) { return spd->nr_pages && spd->pages[spd->nr_pages - 1] == page && (spd->partial[spd->nr_pages - 1].offset + spd->partial[spd->nr_pages - 1].len == offset); } /* * Fill page/offset/length into spd, if it can hold more pages. */ static bool spd_fill_page(struct splice_pipe_desc *spd, struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) return true; if (linear) { page = linear_to_page(page, len, &offset, sk); if (!page) return true; } if (spd_can_coalesce(spd, page, offset)) { spd->partial[spd->nr_pages - 1].len += *len; return false; } get_page(page); spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; return false; } static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct splice_pipe_desc *spd, bool linear, struct sock *sk, struct pipe_inode_info *pipe) { if (!*len) return true; /* skip this segment if already processed */ if (*off >= plen) { *off -= plen; return false; } /* ignore any bits we already processed */ poff += *off; plen -= *off; *off = 0; do { unsigned int flen = min(*len, plen); if (spd_fill_page(spd, pipe, page, &flen, poff, linear, sk)) return true; poff += flen; plen -= flen; *len -= flen; } while (*len && plen); return false; } /* * Map linear and fragment data from the skb to spd. It reports true if the * pipe is full or if we already spliced the requested length. */ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, unsigned int *offset, unsigned int *len, struct splice_pipe_desc *spd, struct sock *sk) { int seg; struct sk_buff *iter; /* map the linear part : * If skb->head_frag is set, this 'linear' part is backed by a * fragment, and if the head is not shared with any clones then * we can avoid a copy since we own the head portion of this page. */ if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), offset, len, spd, skb_head_is_locked(skb), sk, pipe)) return true; /* * then map the fragments */ if (!skb_frags_readable(skb)) return false; for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (WARN_ON_ONCE(!skb_frag_page(f))) return false; if (__splice_segment(skb_frag_page(f), skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) return true; } skb_walk_frags(skb, iter) { if (*offset >= iter->len) { *offset -= iter->len; continue; } /* __skb_splice_bits() only fails if the output has no room * left, so no point in going over the frag_list for the error * case. */ if (__skb_splice_bits(iter, pipe, offset, len, spd, sk)) return true; } return false; } /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. */ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { struct partial_page partial[MAX_SKB_FRAGS]; struct page *pages[MAX_SKB_FRAGS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, .nr_pages_max = MAX_SKB_FRAGS, .ops = &nosteal_pipe_buf_ops, .spd_release = sock_spd_release, }; int ret = 0; __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); if (spd.nr_pages) ret = splice_to_pipe(pipe, &spd); return ret; } EXPORT_SYMBOL_GPL(skb_splice_bits); static int sendmsg_locked(struct sock *sk, struct msghdr *msg) { struct socket *sock = sk->sk_socket; size_t size = msg_data_left(msg); if (!sock) return -EINVAL; if (!sock->ops->sendmsg_locked) return sock_no_sendmsg_locked(sk, msg, size); return sock->ops->sendmsg_locked(sk, msg, size); } static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg) { struct socket *sock = sk->sk_socket; if (!sock) return -EINVAL; return sock_sendmsg(sock, msg); } typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len, sendmsg_func sendmsg) { unsigned int orig_len = len; struct sk_buff *head = skb; unsigned short fragidx; int slen, ret; do_frag_list: /* Deal with head data */ while (offset < skb_headlen(skb) && len) { struct kvec kv; struct msghdr msg; slen = min_t(int, len, skb_headlen(skb) - offset); kv.iov_base = skb->data + offset; kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen); ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &msg); if (ret <= 0) goto error; offset += ret; len -= ret; } /* All the data was skb head? */ if (!len) goto out; /* Make offset relative to start of frags */ offset -= skb_headlen(skb); /* Find where we are in frag list */ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; if (offset < skb_frag_size(frag)) break; offset -= skb_frag_size(frag); } for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { struct bio_vec bvec; struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT, }; bvec_set_page(&bvec, skb_frag_page(frag), slen, skb_frag_off(frag) + offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, slen); ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &msg); if (ret <= 0) goto error; len -= ret; offset += ret; slen -= ret; } offset = 0; } if (len) { /* Process any frag lists */ if (skb == head) { if (skb_has_frag_list(skb)) { skb = skb_shinfo(skb)->frag_list; goto do_frag_list; } } else if (skb->next) { skb = skb->next; goto do_frag_list; } } out: return orig_len - len; error: return orig_len == len ? ret : orig_len - len; } /* Send skb data on a socket. Socket must be locked. */ int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len) { return __skb_send_sock(sk, skb, offset, len, sendmsg_locked); } EXPORT_SYMBOL_GPL(skb_send_sock_locked); /* Send skb data on a socket. Socket must be unlocked. */ int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) { return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked); } /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer * @offset: offset in destination * @from: source buffer * @len: number of bytes to copy * * Copy the specified number of bytes from the source buffer to the * destination skb. This function handles all the messy bits of * traversing fragment lists and such. */ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) { int start = skb_headlen(skb); struct sk_buff *frag_iter; int i, copy; if (offset > (int)skb->len - len) goto fault; if ((copy = start - offset) > 0) { if (copy > len) copy = len; skb_copy_to_linear_data_offset(skb, offset, from, copy); if ((len -= copy) == 0) return 0; offset += copy; from += copy; } if (!skb_frags_readable(skb)) goto fault; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(vaddr + p_off, from + copied, p_len); kunmap_atomic(vaddr); } if ((len -= copy) == 0) return 0; offset += copy; from += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_store_bits(frag_iter, offset - start, from, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; from += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum, const struct skb_checksum_ops *ops) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; /* Checksum header. */ if (copy > 0) { if (copy > len) copy = len; csum = INDIRECT_CALL_1(ops->update, csum_partial_ext, skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; pos = copy; } if (WARN_ON_ONCE(!skb_frags_readable(skb))) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = INDIRECT_CALL_1(ops->update, csum_partial_ext, vaddr + p_off, p_len, 0); kunmap_atomic(vaddr); csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, csum, csum2, pos, p_len); pos += p_len; } if (!(len -= copy)) return csum; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { __wsum csum2; if (copy > len) copy = len; csum2 = __skb_checksum(frag_iter, offset - start, copy, 0, ops); csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; pos += copy; } start = end; } BUG_ON(len); return csum; } EXPORT_SYMBOL(__skb_checksum); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum) { const struct skb_checksum_ops ops = { .update = csum_partial_ext, .combine = csum_block_add_ext, }; return __skb_checksum(skb, offset, len, csum, &ops); } EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; __wsum csum = 0; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; csum = csum_partial_copy_nocheck(skb->data + offset, to, copy); if ((len -= copy) == 0) return csum; offset += copy; to += copy; pos = copy; } if (!skb_frags_readable(skb)) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = csum_partial_copy_nocheck(vaddr + p_off, to + copied, p_len); kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); pos += p_len; } if (!(len -= copy)) return csum; offset += copy; to += copy; } start = end; } skb_walk_frags(skb, frag_iter) { __wsum csum2; int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; csum2 = skb_copy_and_csum_bits(frag_iter, offset - start, to, copy); csum = csum_block_add(csum, csum2, pos); if ((len -= copy) == 0) return csum; offset += copy; to += copy; pos += copy; } start = end; } BUG_ON(len); return csum; } EXPORT_SYMBOL(skb_copy_and_csum_bits); __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) { __sum16 sum; sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); /* See comments in __skb_checksum_complete(). */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); /* This function assumes skb->csum already holds pseudo header's checksum, * which has been changed from the hardware checksum, for example, by * __skb_checksum_validate_complete(). And, the original skb->csum must * have been validated unsuccessfully for CHECKSUM_COMPLETE case. * * It returns non-zero if the recomputed checksum is still invalid, otherwise * zero. The new checksum is stored back into skb->csum unless the skb is * shared. */ __sum16 __skb_checksum_complete(struct sk_buff *skb) { __wsum csum; __sum16 sum; csum = skb_checksum(skb, 0, skb->len, 0); sum = csum_fold(csum_add(skb->csum, csum)); /* This check is inverted, because we already knew the hardware * checksum is invalid before calling this function. So, if the * re-computed checksum is valid instead, then we have a mismatch * between the original skb->csum and skb_checksum(). This means either * the original hardware checksum is incorrect or we screw up skb->csum * when moving skb->data around. */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) { /* Save full packet checksum */ skb->csum = csum; skb->ip_summed = CHECKSUM_COMPLETE; skb->csum_complete_sw = 1; skb->csum_valid = !sum; } return sum; } EXPORT_SYMBOL(__skb_checksum_complete); static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) { net_warn_ratelimited( "%s: attempt to compute crc32c without libcrc32c.ko\n", __func__); return 0; } static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { net_warn_ratelimited( "%s: attempt to compute crc32c without libcrc32c.ko\n", __func__); return 0; } static const struct skb_checksum_ops default_crc32c_ops = { .update = warn_crc32c_csum_update, .combine = warn_crc32c_csum_combine, }; const struct skb_checksum_ops *crc32c_csum_stub __read_mostly = &default_crc32c_ops; EXPORT_SYMBOL(crc32c_csum_stub); /** * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() * @from: source buffer * * Calculates the amount of linear headroom needed in the 'to' skb passed * into skb_zerocopy(). */ unsigned int skb_zerocopy_headlen(const struct sk_buff *from) { unsigned int hlen = 0; if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); if (!hlen) hlen = from->len; } if (skb_has_frag_list(from)) hlen = from->len; return hlen; } EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); /** * skb_zerocopy - Zero copy skb to skb * @to: destination buffer * @from: source buffer * @len: number of bytes to copy from source buffer * @hlen: size of linear headroom in destination buffer * * Copies up to `len` bytes from `from` to `to` by creating references * to the frags in the source buffer. * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. * * Return value: * 0: everything is OK * -ENOMEM: couldn't orphan frags of @from due to lack of memory * -EFAULT: skb_copy_bits() found some problem with skb geometry */ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ if (len <= skb_tailroom(to)) return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); if (unlikely(ret)) return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); if (plen) { page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); __skb_fill_netmem_desc(to, 0, page_to_netmem(page), offset, plen); get_page(page); j = 1; len -= plen; } } skb_len_add(to, len + plen); if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); return -ENOMEM; } skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { int size; if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]), len); skb_frag_size_set(&skb_shinfo(to)->frags[j], size); len -= size; skb_frag_ref(to, j); j++; } skb_shinfo(to)->nr_frags = j; return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb_checksum_start_offset(skb); else csstart = skb_headlen(skb); BUG_ON(csstart > skb_headlen(skb)); skb_copy_from_linear_data(skb, to, csstart); csum = 0; if (csstart != skb->len) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart); if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum_offset; *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } EXPORT_SYMBOL(skb_copy_and_csum_dev); /** * skb_dequeue - remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. The list lock is taken so the function * may be used safely with other locking list functions. The head item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list->lock, flags); result = __skb_dequeue(list); spin_unlock_irqrestore(&list->lock, flags); return result; } EXPORT_SYMBOL(skb_dequeue); /** * skb_dequeue_tail - remove from the tail of the queue * @list: list to dequeue from * * Remove the tail of the list. The list lock is taken so the function * may be used safely with other locking list functions. The tail item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list->lock, flags); result = __skb_dequeue_tail(list); spin_unlock_irqrestore(&list->lock, flags); return result; } EXPORT_SYMBOL(skb_dequeue_tail); /** * skb_queue_purge_reason - empty a list * @list: list to empty * @reason: drop reason * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function takes the list * lock and is atomic with respect to other list locking functions. */ void skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason) { struct sk_buff_head tmp; unsigned long flags; if (skb_queue_empty_lockless(list)) return; __skb_queue_head_init(&tmp); spin_lock_irqsave(&list->lock, flags); skb_queue_splice_init(list, &tmp); spin_unlock_irqrestore(&list->lock, flags); __skb_queue_purge_reason(&tmp, reason); } EXPORT_SYMBOL(skb_queue_purge_reason); /** * skb_rbtree_purge - empty a skb rbtree * @root: root of the rbtree to empty * Return value: the sum of truesizes of all purged skbs. * * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from * the list and one reference dropped. This function does not take * any lock. Synchronization should be handled by the caller (e.g., TCP * out-of-order queue is protected by the socket lock). */ unsigned int skb_rbtree_purge(struct rb_root *root) { struct rb_node *p = rb_first(root); unsigned int sum = 0; while (p) { struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); p = rb_next(p); rb_erase(&skb->rbnode, root); sum += skb->truesize; kfree_skb(skb); } return sum; } void skb_errqueue_purge(struct sk_buff_head *list) { struct sk_buff *skb, *next; struct sk_buff_head kill; unsigned long flags; __skb_queue_head_init(&kill); spin_lock_irqsave(&list->lock, flags); skb_queue_walk_safe(list, skb, next) { if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY || SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) continue; __skb_unlink(skb, list); __skb_queue_tail(&kill, skb); } spin_unlock_irqrestore(&list->lock, flags); __skb_queue_purge(&kill); } EXPORT_SYMBOL(skb_errqueue_purge); /** * skb_queue_head - queue a buffer at the list head * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the start of the list. This function takes the * list lock and can be used safely with other locking &sk_buff functions * safely. * * A buffer cannot be placed on two lists at the same time. */ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_head(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_queue_head); /** * skb_queue_tail - queue a buffer at the list tail * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the tail of the list. This function takes the * list lock and can be used safely with other locking &sk_buff functions * safely. * * A buffer cannot be placed on two lists at the same time. */ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_queue_tail); /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove * @list: list to use * * Remove a packet from a list. The list locks are taken and this * function is atomic with respect to other list locked calls * * You must know what list the SKB is on. */ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_unlink(skb, list); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_unlink); /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_after(list, old, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_append); static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, const int pos) { int i; skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), pos - len); /* And move data appendix as is. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; skb1->unreadable = skb->unreadable; skb_shinfo(skb)->nr_frags = 0; skb1->data_len = skb->data_len; skb1->len += skb1->data_len; skb->data_len = 0; skb->len = len; skb_set_tail_pointer(skb, len); } static inline void skb_split_no_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, int pos) { int i, k = 0; const int nfrags = skb_shinfo(skb)->nr_frags; skb_shinfo(skb)->nr_frags = 0; skb1->len = skb1->data_len = skb->len - len; skb->len = len; skb->data_len = len - pos; for (i = 0; i < nfrags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + size > len) { skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; if (pos < len) { /* Split frag. * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, * where splitting is expensive. * 2. Split is accurately. We make this. */ skb_frag_ref(skb, i); skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; } k++; } else skb_shinfo(skb)->nr_frags++; pos += size; } skb_shinfo(skb1)->nr_frags = k; skb1->unreadable = skb->unreadable; } /** * skb_split - Split fragmented skb to two parts at length len. * @skb: the buffer to split * @skb1: the buffer to receive the second part * @len: new length for skb */ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY; skb_zcopy_downgrade_managed(skb); skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ skb_split_no_header(skb, skb1, len, pos); } EXPORT_SYMBOL(skb_split); /* Shifting from/to a cloned skb is a no-go. * * Caller cannot keep skb_shinfo related pointers past calling here! */ static int skb_prepare_for_shift(struct sk_buff *skb) { return skb_unclone_keeptruesize(skb, GFP_ATOMIC); } /** * skb_shift - Shifts paged data partially from skb to another * @tgt: buffer into which tail data gets added * @skb: buffer from which the paged data comes from * @shiftlen: shift up to this many bytes * * Attempts to shift up to shiftlen worth of bytes, which may be less than * the length of the skb, from skb to tgt. Returns number bytes shifted. * It's up to caller to free skb if everything was shifted. * * If @tgt runs out of frags, the whole operation is aborted. * * Skb cannot include anything else but paged data while tgt is allowed * to have non-paged data as well. * * TODO: full sized shift could be optimized but that would need * specialized skb free'er to handle frags without up-to-date nr_frags. */ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) { int from, to, merge, todo; skb_frag_t *fragfrom, *fragto; BUG_ON(shiftlen > skb->len); if (skb_headlen(skb)) return 0; if (skb_zcopy(tgt) || skb_zcopy(skb)) return 0; DEBUG_NET_WARN_ON_ONCE(tgt->pp_recycle != skb->pp_recycle); DEBUG_NET_WARN_ON_ONCE(skb_cmp_decrypted(tgt, skb)); todo = shiftlen; from = 0; to = skb_shinfo(tgt)->nr_frags; fragfrom = &skb_shinfo(skb)->frags[from]; /* Actual merge is delayed until the point when we know we can * commit all, so that we don't have to undo partial changes */ if (!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), skb_frag_off(fragfrom))) { merge = -1; } else { merge = to - 1; todo -= skb_frag_size(fragfrom); if (todo < 0) { if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) return 0; /* All previous frag pointers might be stale! */ fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; skb_frag_size_add(fragto, shiftlen); skb_frag_size_sub(fragfrom, shiftlen); skb_frag_off_add(fragfrom, shiftlen); goto onlymerged; } from++; } /* Skip full, not-fitting skb to avoid expensive operations */ if ((shiftlen == skb->len) && (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) return 0; if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) return 0; while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { if (to == MAX_SKB_FRAGS) return 0; fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[to]; if (todo >= skb_frag_size(fragfrom)) { *fragto = *fragfrom; todo -= skb_frag_size(fragfrom); from++; to++; } else { __skb_frag_ref(fragfrom); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); skb_frag_off_add(fragfrom, todo); skb_frag_size_sub(fragfrom, todo); todo = 0; to++; break; } } /* Ready to "commit" this state change to tgt */ skb_shinfo(tgt)->nr_frags = to; if (merge >= 0) { fragfrom = &skb_shinfo(skb)->frags[0]; fragto = &skb_shinfo(tgt)->frags[merge]; skb_frag_size_add(fragto, skb_frag_size(fragfrom)); __skb_frag_unref(fragfrom, skb->pp_recycle); } /* Reposition in the original skb */ to = 0; while (from < skb_shinfo(skb)->nr_frags) skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; skb_shinfo(skb)->nr_frags = to; BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); onlymerged: /* Most likely the tgt won't ever need its checksum anymore, skb on * the other hand might need it if it needs to be resent */ tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; skb_len_add(skb, -shiftlen); skb_len_add(tgt, shiftlen); return shiftlen; } /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read * @from: lower offset of data to be read * @to: upper offset of data to be read * @st: state variable * * Initializes the specified state variable. Must be called before * invoking skb_seq_read() for the first time. */ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int to, struct skb_seq_state *st) { st->lower_offset = from; st->upper_offset = to; st->root_skb = st->cur_skb = skb; st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; st->frag_off = 0; } EXPORT_SYMBOL(skb_prepare_seq_read); /** * skb_seq_read - Sequentially read skb data * @consumed: number of bytes consumed by the caller so far * @data: destination pointer for data to be returned * @st: state variable * * Reads a block of skb data at @consumed relative to the * lower offset specified to skb_prepare_seq_read(). Assigns * the head of the data block to @data and returns the length * of the block or 0 if the end of the skb data or the upper * offset has been reached. * * The caller is not required to consume all of the data * returned, i.e. @consumed is typically set to the number * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * * Note 1: The size of each block of data returned can be arbitrary, * this limitation is the cost for zerocopy sequential * reads of potentially non linear data. * * Note 2: Fragment lists within fragments are not implemented * at the moment, state->root_skb could be replaced with * a stack for this purpose. */ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st) { unsigned int block_limit, abs_offset = consumed + st->lower_offset; skb_frag_t *frag; if (unlikely(abs_offset >= st->upper_offset)) { if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } return 0; } next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } if (!skb_frags_readable(st->cur_skb)) return 0; if (st->frag_idx == 0 && !st->frag_data) st->stepped_offset += skb_headlen(st->cur_skb); while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { unsigned int pg_idx, pg_off, pg_sz; frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; pg_idx = 0; pg_off = skb_frag_off(frag); pg_sz = skb_frag_size(frag); if (skb_frag_must_loop(skb_frag_page(frag))) { pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT; pg_off = offset_in_page(pg_off + st->frag_off); pg_sz = min_t(unsigned int, pg_sz - st->frag_off, PAGE_SIZE - pg_off); } block_limit = pg_sz + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx); *data = (u8 *)st->frag_data + pg_off + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } st->stepped_offset += pg_sz; st->frag_off += pg_sz; if (st->frag_off == skb_frag_size(frag)) { st->frag_off = 0; st->frag_idx++; } } if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; } else if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; } return 0; } EXPORT_SYMBOL(skb_seq_read); /** * skb_abort_seq_read - Abort a sequential read of skb data * @st: state variable * * Must be called if skb_seq_read() was not called until it * returned 0. */ void skb_abort_seq_read(struct skb_seq_state *st) { if (st->frag_data) kunmap_atomic(st->frag_data); } EXPORT_SYMBOL(skb_abort_seq_read); /** * skb_copy_seq_read() - copy from a skb_seq_state to a buffer * @st: source skb_seq_state * @offset: offset in source * @to: destination buffer * @len: number of bytes to copy * * Copy @len bytes from @offset bytes into the source @st to the destination * buffer @to. `offset` should increase (or be unchanged) with each subsequent * call to this function. If offset needs to decrease from the previous use `st` * should be reset first. * * Return: 0 on success or -EINVAL if the copy ended early */ int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len) { const u8 *data; u32 sqlen; for (;;) { sqlen = skb_seq_read(offset, &data, st); if (sqlen == 0) return -EINVAL; if (sqlen >= len) { memcpy(to, data, len); return 0; } memcpy(to, data, sqlen); to += sqlen; offset += sqlen; len -= sqlen; } } EXPORT_SYMBOL(skb_copy_seq_read); #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, struct ts_config *conf, struct ts_state *state) { return skb_seq_read(offset, text, TS_SKB_CB(state)); } static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) { skb_abort_seq_read(TS_SKB_CB(state)); } /** * skb_find_text - Find a text pattern in skb data * @skb: the buffer to look in * @from: search offset * @to: search limit * @config: textsearch configuration * * Finds a pattern in the skb data according to the specified * textsearch configuration. Use textsearch_next() to retrieve * subsequent occurrences of the pattern. Returns the offset * to the first occurrence or UINT_MAX if no match was found. */ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config) { unsigned int patlen = config->ops->get_pattern_len(config); struct ts_state state; unsigned int ret; BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb)); config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); ret = textsearch_find(config, &state); return (ret + patlen <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size, size_t max_frags) { int i = skb_shinfo(skb)->nr_frags; if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); } else if (i < max_frags) { skb_zcopy_downgrade_managed(skb); get_page(page); skb_fill_page_desc_noacc(skb, i, page, offset, size); } else { return -EMSGSIZE; } return 0; } EXPORT_SYMBOL_GPL(skb_append_pagefrags); /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update * @len: length of data pulled * * This function performs an skb_pull on the packet and updates * the CHECKSUM_COMPLETE checksum. It should be used on * receive path processing instead of skb_pull unless you know * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { unsigned char *data = skb->data; BUG_ON(len > skb->len); __skb_pull(skb, len); skb_postpull_rcsum(skb, data, len); return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) { skb_frag_t head_frag; struct page *page; page = virt_to_head_page(frag_skb->head); skb_frag_fill_page_desc(&head_frag, page, frag_skb->data - (unsigned char *)page_address(page), skb_headlen(frag_skb)); return head_frag; } struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset) { struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); /* Ensure the head is writeable before touching the shared info */ err = skb_unclone(skb, GFP_ATOMIC); if (err) goto err_linearize; skb_shinfo(skb)->frag_list = NULL; while (list_skb) { nskb = list_skb; list_skb = list_skb->next; err = 0; delta_truesize += nskb->truesize; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { consume_skb(nskb); nskb = tmp; err = skb_unclone(nskb, GFP_ATOMIC); } else { err = -ENOMEM; } } if (!tail) skb->next = nskb; else tail->next = nskb; if (unlikely(err)) { nskb->next = list_skb; goto err_linearize; } tail = nskb; delta_len += nskb->len; skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); if (skb_needs_linearize(nskb, features) && __skb_linearize(nskb)) goto err_linearize; } skb->truesize = skb->truesize - delta_truesize; skb->data_len = skb->data_len - delta_len; skb->len = skb->len - delta_len; skb_gso_reset(skb); skb->prev = tail; if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto err_linearize; skb_get(skb); return skb; err_linearize: kfree_skb_list(skb->next); skb->next = NULL; return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(skb_segment_list); /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ struct sk_buff *skb_segment(struct sk_buff *head_skb, netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; unsigned int len = head_skb->len; struct sk_buff *frag_skb; skb_frag_t *frag; __be16 proto; bool csum, sg; int err = -ENOMEM; int i = 0; int nfrags, pos; if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { struct sk_buff *check_skb; for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { if (skb_headlen(check_skb) && !check_skb->head_frag) { /* gso_size is untrusted, and we have a frag_list with * a linear non head_frag item. * * If head_skb's headlen does not fit requested gso_size, * it means that the frag_list members do NOT terminate * on exact gso_size boundaries. Hence we cannot perform * skb_frag_t page sharing. Therefore we must fallback to * copying the frag_list skbs; we do so by disabling SG. */ features &= ~NETIF_F_SG; break; } } } __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL); sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); if (sg && csum && (mss != GSO_BY_FRAGS)) { if (!(features & NETIF_F_GSO_PARTIAL)) { struct sk_buff *iter; unsigned int frag_len; if (!list_skb || !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) goto normal; /* If we get here then all the required * GSO features except frag_list are supported. * Try to split the SKB to multiple GSO SKBs * with no frag_list. * Currently we can do that only when the buffers don't * have a linear part and all the buffers except * the last are of the same length. */ frag_len = list_skb->len; skb_walk_frags(head_skb, iter) { if (frag_len != iter->len && iter->next) goto normal; if (skb_headlen(iter) && !iter->head_frag) goto normal; len -= iter->len; } if (len != frag_len) goto normal; } /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. * Cap len to not accidentally hit GSO_BY_FRAGS. */ partial_segs = min(len, GSO_BY_FRAGS - 1) / mss; if (partial_segs > 1) mss *= partial_segs; else partial_segs = 0; } normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); if (skb_orphan_frags(head_skb, GFP_ATOMIC)) return ERR_PTR(-ENOMEM); nfrags = skb_shinfo(head_skb)->nr_frags; frag = skb_shinfo(head_skb)->frags; frag_skb = head_skb; do { struct sk_buff *nskb; skb_frag_t *nskb_frag; int hsize; int size; if (unlikely(mss == GSO_BY_FRAGS)) { len = list_skb->len; } else { len = head_skb->len - offset; if (len > mss) len = mss; } hsize = skb_headlen(head_skb) - offset; if (hsize <= 0 && i >= nfrags && skb_headlen(list_skb) && (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); nskb = skb_clone(list_skb, GFP_ATOMIC); if (unlikely(!nskb)) goto err; i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; frag++; } list_skb = list_skb->next; if (unlikely(pskb_trim(nskb, len))) { kfree_skb(nskb); goto err; } hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); goto err; } nskb->truesize += skb_end_offset(nskb) - hsize; skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { if (hsize < 0) hsize = 0; if (hsize > len || !sg) hsize = len; nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) goto err; skb_reserve(nskb, headroom); __skb_put(nskb, doffset); } if (segs) tail->next = nskb; else segs = nskb; tail = nskb; __copy_skb_header(nskb, head_skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); skb_reset_mac_len(nskb); skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { if (!csum) { if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len); SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } else { if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len)) goto err; } continue; } nskb_frag = skb_shinfo(nskb)->frags; skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags & SKBFL_SHARED_FRAG; if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; while (pos < offset + len) { if (i >= nfrags) { if (skb_orphan_frags(list_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC)) goto err; i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; if (!skb_headlen(list_skb)) { BUG_ON(!nfrags); } else { BUG_ON(!list_skb->head_frag); /* to make room for head_frag. */ i--; frag--; } list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= MAX_SKB_FRAGS)) { net_warn_ratelimited( "skb_segment: too many frags: %u %u\n", pos, mss); err = -EINVAL; goto err; } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); if (pos < offset) { skb_frag_off_add(nskb_frag, offset - pos); skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } nskb_frag++; } skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; perform_csum_check: if (!csum) { if (skb_has_shared_frag(nskb) && __skb_linearize(nskb)) goto err; if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); /* Some callers want to get the end of the list. * Put it in segs->prev to avoid walking the list. * (see validate_xmit_skb_list() for example) */ segs->prev = tail; if (partial_segs) { struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ for (iter = segs; iter; iter = iter->next) { skb_shinfo(iter)->gso_size = gso_size; skb_shinfo(iter)->gso_segs = partial_segs; skb_shinfo(iter)->gso_type = type; SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; } if (tail->len - doffset <= gso_size) skb_shinfo(tail)->gso_size = 0; else if (tail != segs) skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. */ if (head_skb->destructor == sock_wfree) { swap(tail->truesize, head_skb->truesize); swap(tail->destructor, head_skb->destructor); swap(tail->sk, head_skb->sk); } return segs; err: kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 #define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE) static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info), #endif #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), #endif #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif #if IS_ENABLED(CONFIG_MCTP_FLOWS) [SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow), #endif }; static __always_inline unsigned int skb_ext_total_length(void) { unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext); int i; for (i = 0; i < ARRAY_SIZE(skb_ext_type_len); i++) l += skb_ext_type_len[i]; return l; } static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); #if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); #endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } #else static void skb_extensions_init(void) {} #endif /* The SKB kmem_cache slab is critical for network performance. Never * merge/alias the slab with similar sized objects. This avoids fragmentation * that hurts performance of kmem_cache_{alloc,free}_bulk APIs. */ #ifndef CONFIG_SLUB_TINY #define FLAG_SKB_NO_MERGE SLAB_NO_MERGE #else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */ #define FLAG_SKB_NO_MERGE 0 #endif void __init skb_init(void) { net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC| FLAG_SKB_NO_MERGE, offsetof(struct sk_buff, cb), sizeof_field(struct sk_buff, cb), NULL); net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes. * struct skb_shared_info is located at the end of skb->head, * and should not be copied to/from user. */ net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head", SKB_SMALL_HEAD_CACHE_SIZE, 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 0, SKB_SMALL_HEAD_HEADROOM, NULL); skb_extensions_init(); } static int __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; if (unlikely(recursion_level >= 24)) return -EMSGSIZE; if (copy > 0) { if (copy > len) copy = len; sg_set_buf(sg, skb->data + offset, copy); elt++; if ((len -= copy) == 0) return elt; offset += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (unlikely(elt && sg_is_last(&sg[elt - 1]))) return -EMSGSIZE; if (copy > len) copy = len; sg_set_page(&sg[elt], skb_frag_page(frag), copy, skb_frag_off(frag) + offset - start); elt++; if (!(len -= copy)) return elt; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (unlikely(elt && sg_is_last(&sg[elt - 1]))) return -EMSGSIZE; if (copy > len) copy = len; ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, copy, recursion_level + 1); if (unlikely(ret < 0)) return ret; elt += ret; if ((len -= copy) == 0) return elt; offset += copy; } start = end; } BUG_ON(len); return elt; } /** * skb_to_sgvec - Fill a scatter-gather list from a socket buffer * @skb: Socket buffer containing the buffers to be mapped * @sg: The scatter-gather list to map into * @offset: The offset into the buffer's contents to start mapping * @len: Length of buffer space to be mapped * * Fill the specified scatter-gather list with mappings/pointers into a * region of the buffer space attached to a socket buffer. Returns either * the number of scatterlist items used, or -EMSGSIZE if the contents * could not fit. */ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); if (nsg <= 0) return nsg; sg_mark_end(&sg[nsg - 1]); return nsg; } EXPORT_SYMBOL_GPL(skb_to_sgvec); /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after * the first call without calling sg_unmark_end to expend sg list. * * Scenario to use skb_to_sgvec_nomark: * 1. sg_init_table * 2. skb_to_sgvec_nomark(payload1) * 3. skb_to_sgvec_nomark(payload2) * * This is equivalent to: * 1. sg_init_table * 2. skb_to_sgvec(payload1) * 3. sg_unmark_end * 4. skb_to_sgvec(payload2) * * When mapping multiple payload conditionally, skb_to_sgvec_nomark * is more preferable. */ int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); /** * skb_cow_data - Check that a socket buffer's data buffers are writable * @skb: The socket buffer to check. * @tailbits: Amount of trailing space to be added * @trailer: Returned pointer to the skb where the @tailbits space begins * * Make sure that the data buffers attached to a socket buffer are * writable. If they are not, private copies are made of the data buffers * and the socket buffer is set to use these instead. * * If @tailbits is given, make sure that there is space to write @tailbits * bytes of data beyond current end of socket buffer. @trailer will be * set to point to the skb in which this space begins. * * The number of scatterlist elements required to completely map the * COW'd and extended socket buffer will be returned. */ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) { int copyflag; int elt; struct sk_buff *skb1, **skb_p; /* If skb is cloned or its head is paged, reallocate * head pulling out all the pages (pages are considered not writable * at the moment even if they are anonymous). */ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && !__pskb_pull_tail(skb, __skb_pagelen(skb))) return -ENOMEM; /* Easy case. Most of packets will go this way. */ if (!skb_has_frag_list(skb)) { /* A little of trouble, not enough of space for trailer. * This should not happen, when stack is tuned to generate * good frames. OK, on miss we reallocate and reserve even more * space, 128 bytes is fair. */ if (skb_tailroom(skb) < tailbits && pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) return -ENOMEM; /* Voila! */ *trailer = skb; return 1; } /* Misery. We are in troubles, going to mincer fragments... */ elt = 1; skb_p = &skb_shinfo(skb)->frag_list; copyflag = 0; while ((skb1 = *skb_p) != NULL) { int ntail = 0; /* The fragment is partially pulled by someone, * this can happen on input. Copy it and everything * after it. */ if (skb_shared(skb1)) copyflag = 1; /* If the skb is the last, worry about trailer. */ if (skb1->next == NULL && tailbits) { if (skb_shinfo(skb1)->nr_frags || skb_has_frag_list(skb1) || skb_tailroom(skb1) < tailbits) ntail = tailbits + 128; } if (copyflag || skb_cloned(skb1) || ntail || skb_shinfo(skb1)->nr_frags || skb_has_frag_list(skb1)) { struct sk_buff *skb2; /* Fuck, we are miserable poor guys... */ if (ntail == 0) skb2 = skb_copy(skb1, GFP_ATOMIC); else skb2 = skb_copy_expand(skb1, skb_headroom(skb1), ntail, GFP_ATOMIC); if (unlikely(skb2 == NULL)) return -ENOMEM; if (skb1->sk) skb_set_owner_w(skb2, skb1->sk); /* Looking around. Are we still alive? * OK, link new skb, drop old one */ skb2->next = skb1->next; *skb_p = skb2; kfree_skb(skb1); skb1 = skb2; } elt++; *trailer = skb1; skb_p = &skb1->next; } return elt; } EXPORT_SYMBOL_GPL(skb_cow_data); static void sock_rmem_free(struct sk_buff *skb) { struct sock *sk = skb->sk; atomic_sub(skb->truesize, &sk->sk_rmem_alloc); } static void skb_set_err_queue(struct sk_buff *skb) { /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. * So, it is safe to (mis)use it to mark skbs on the error queue. */ skb->pkt_type = PACKET_OUTGOING; BUILD_BUG_ON(PACKET_OUTGOING == 0); } /* * Note: We dont mem charge error packets (no sk_forward_alloc changes) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); skb->sk = sk; skb->destructor = sock_rmem_free; atomic_add(skb->truesize, &sk->sk_rmem_alloc); skb_set_err_queue(skb); /* before exiting rcu section, make sure dst is refcounted */ skb_dst_force(skb); skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); static bool is_icmp_err_skb(const struct sk_buff *skb) { return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP || SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6); } struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next = NULL; bool icmp_next = false; unsigned long flags; if (skb_queue_empty_lockless(q)) return NULL; spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); if (icmp_next) sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno; } spin_unlock_irqrestore(&q->lock, flags); if (is_icmp_err_skb(skb) && !icmp_next) sk->sk_err = 0; if (skb_next) sk_error_report(sk); return skb; } EXPORT_SYMBOL(sock_dequeue_err_skb); /** * skb_clone_sk - create clone of skb, and take reference to socket * @skb: the skb to clone * * This function creates a clone of a buffer that holds a reference on * sk_refcnt. Buffers created via this function are meant to be * returned using sock_queue_err_skb, or free via kfree_skb. * * When passing buffers allocated with this function to sock_queue_err_skb * it is necessary to wrap the call with sock_hold/sock_put in order to * prevent the socket from being released prior to being enqueued on * the sk_error_queue. */ struct sk_buff *skb_clone_sk(struct sk_buff *skb) { struct sock *sk = skb->sk; struct sk_buff *clone; if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; clone = skb_clone(skb, GFP_ATOMIC); if (!clone) { sock_put(sk); return NULL; } clone->sk = sk; clone->destructor = sock_efree; return clone; } EXPORT_SYMBOL(skb_clone_sk); static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, int tstype, bool opt_stats) { struct sock_exterr_skb *serr; int err; BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk_is_tcp(sk)) serr->ee.ee_data -= atomic_read(&sk->sk_tskey); } err = sock_queue_err_skb(sk, skb); if (err) kfree_skb(skb); } static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data))) return true; read_lock_bh(&sk->sk_callback_lock); ret = sk->sk_socket && sk->sk_socket->file && file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); read_unlock_bh(&sk->sk_callback_lock); return ret; } void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) { struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); return; } err: kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype) { struct sk_buff *skb; bool tsonly, opt_stats = false; u32 tsflags; if (!sk) return; tsflags = READ_ONCE(sk->sk_tsflags); if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) return; tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) { #ifdef CONFIG_INET if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk_is_tcp(sk)) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb, ack_skb); opt_stats = true; } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { skb = skb_clone(orig_skb, GFP_ATOMIC); if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) { kfree_skb(skb); return; } } if (!skb) return; if (tsonly) { skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } if (hwtstamps) *skb_hwtstamps(skb) = *hwtstamps; else __net_timestamp(skb); __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk, SCM_TSTAMP_SND); } EXPORT_SYMBOL_GPL(skb_tstamp_tx); #ifdef CONFIG_WIRELESS void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { err = sock_queue_err_skb(sk, skb); sock_put(sk); } if (err) kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); #endif /* CONFIG_WIRELESS */ /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set * @start: the number of bytes after skb->data to start checksumming. * @off: the offset from start to place the checksum. * * For untrusted partially-checksummed packets, we need to make sure the values * for skb->csum_start and skb->csum_offset are valid so we don't oops. * * This function checks and sets those values and skb->ip_summed: if this * returns false you should drop the packet. */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); u32 csum_start = skb_headroom(skb) + (u32)start; if (unlikely(csum_start >= U16_MAX || csum_end > skb_headlen(skb))) { net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = csum_start; skb->csum_offset = off; skb->transport_header = csum_start; return true; } EXPORT_SYMBOL_GPL(skb_partial_csum_set); static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max) { if (skb_headlen(skb) >= len) return 0; /* If we need to pullup then pullup to the max, so we * won't need to do it again. */ if (max > skb->len) max = skb->len; if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) return -ENOMEM; if (skb_headlen(skb) < len) return -EPROTO; return 0; } #define MAX_TCP_HDR_LEN (15 * 4) static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, typeof(IPPROTO_IP) proto, unsigned int off) { int err; switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); if (!err && !skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) err = -EPROTO; return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; case IPPROTO_UDP: err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), off + sizeof(struct udphdr)); if (!err && !skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) err = -EPROTO; return err ? ERR_PTR(err) : &udp_hdr(skb)->check; } return ERR_PTR(-EPROTO); } /* This value should be large enough to cover a tagged ethernet header plus * maximally sized IP and TCP or UDP headers. */ #define MAX_IP_HDR_LEN 128 static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) { unsigned int off; bool fragment; __sum16 *csum; int err; fragment = false; err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN); if (err < 0) goto out; if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); err = -EPROTO; if (fragment) goto out; csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); if (IS_ERR(csum)) return PTR_ERR(csum); if (recalculate) *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len - off, ip_hdr(skb)->protocol, 0); err = 0; out: return err; } /* This value should be large enough to cover a tagged ethernet header plus * an IPv6 header, all options, and a maximal TCP or UDP header. */ #define MAX_IPV6_HDR_LEN 256 #define OPT_HDR(type, skb, off) \ (type *)(skb_network_header(skb) + (off)) static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) { int err; u8 nexthdr; unsigned int off; unsigned int len; bool fragment; bool done; __sum16 *csum; fragment = false; done = false; off = sizeof(struct ipv6hdr); err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); if (err < 0) goto out; nexthdr = ipv6_hdr(skb)->nexthdr; len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); while (off <= len && !done) { switch (nexthdr) { case IPPROTO_DSTOPTS: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: { struct ipv6_opt_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_optlen(hp); break; } case IPPROTO_AH: { struct ip_auth_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct ip_auth_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct ip_auth_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_authlen(hp); break; } case IPPROTO_FRAGMENT: { struct frag_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct frag_hdr, skb, off); if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) fragment = true; nexthdr = hp->nexthdr; off += sizeof(struct frag_hdr); break; } default: done = true; break; } } err = -EPROTO; if (!done || fragment) goto out; csum = skb_checksum_setup_ip(skb, nexthdr, off); if (IS_ERR(csum)) return PTR_ERR(csum); if (recalculate) *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - off, nexthdr, 0); err = 0; out: return err; } /** * skb_checksum_setup - set up partial checksum offset * @skb: the skb to set up * @recalculate: if true the pseudo-header checksum will be recalculated */ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) { int err; switch (skb->protocol) { case htons(ETH_P_IP): err = skb_checksum_setup_ipv4(skb, recalculate); break; case htons(ETH_P_IPV6): err = skb_checksum_setup_ipv6(skb, recalculate); break; default: err = -EPROTO; break; } return err; } EXPORT_SYMBOL(skb_checksum_setup); /** * skb_checksum_maybe_trim - maybe trims the given skb * @skb: the skb to check * @transport_len: the data length beyond the network header * * Checks whether the given skb has data beyond the given transport length. * If so, returns a cloned skb trimmed to this transport length. * Otherwise returns the provided skb. Returns NULL in error cases * (e.g. transport_len exceeds skb length or out-of-memory). * * Caller needs to set the skb transport header and free any returned skb if it * differs from the provided skb. */ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int transport_len) { struct sk_buff *skb_chk; unsigned int len = skb_transport_offset(skb) + transport_len; int ret; if (skb->len < len) return NULL; else if (skb->len == len) return skb; skb_chk = skb_clone(skb, GFP_ATOMIC); if (!skb_chk) return NULL; ret = pskb_trim_rcsum(skb_chk, len); if (ret) { kfree_skb(skb_chk); return NULL; } return skb_chk; } /** * skb_checksum_trimmed - validate checksum of an skb * @skb: the skb to check * @transport_len: the data length beyond the network header * @skb_chkf: checksum function to use * * Applies the given checksum function skb_chkf to the provided skb. * Returns a checked and maybe trimmed skb. Returns NULL on error. * * If the skb has data beyond the given transport length, then a * trimmed & cloned skb is checked and returned. * * Caller needs to set the skb transport header and free any returned skb if it * differs from the provided skb. */ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)) { struct sk_buff *skb_chk; unsigned int offset = skb_transport_offset(skb); __sum16 ret; skb_chk = skb_checksum_maybe_trim(skb, transport_len); if (!skb_chk) goto err; if (!pskb_may_pull(skb_chk, offset)) goto err; skb_pull_rcsum(skb_chk, offset); ret = skb_chkf(skb_chk); skb_push_rcsum(skb_chk, offset); if (ret) goto err; return skb_chk; err: if (skb_chk && skb_chk != skb) kfree_skb(skb_chk); return NULL; } EXPORT_SYMBOL(skb_checksum_trimmed); void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", skb->dev->name); } EXPORT_SYMBOL(__skb_warn_lro_forwarding); void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) { if (head_stolen) { skb_release_head_state(skb); kmem_cache_free(net_hotdata.skbuff_cache, skb); } else { __kfree_skb(skb); } } EXPORT_SYMBOL(kfree_skb_partial); /** * skb_try_coalesce - try to merge skb to prior one * @to: prior buffer * @from: buffer to add * @fragstolen: pointer to boolean * @delta_truesize: how much more was allocated than was requested */ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; if (skb_cloned(to)) return false; /* In general, avoid mixing page_pool and non-page_pool allocated * pages within the same SKB. In theory we could take full * references if @from is cloned and !@to->pp_recycle but its * tricky (due to potential race with the clone disappearing) and * rare, so not worth dealing with. */ if (to->pp_recycle != from->pp_recycle) return false; if (skb_frags_readable(from) != skb_frags_readable(to)) return false; if (len <= skb_tailroom(to) && skb_frags_readable(from)) { if (len) BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); *delta_truesize = 0; return true; } to_shinfo = skb_shinfo(to); from_shinfo = skb_shinfo(from); if (to_shinfo->frag_list || from_shinfo->frag_list) return false; if (skb_zcopy(to) || skb_zcopy(from)) return false; if (skb_headlen(from) != 0) { struct page *page; unsigned int offset; if (to_shinfo->nr_frags + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) return false; delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { if (to_shinfo->nr_frags + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); } WARN_ON_ONCE(delta < len); memcpy(to_shinfo->frags + to_shinfo->nr_frags, from_shinfo->frags, from_shinfo->nr_frags * sizeof(skb_frag_t)); to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ if (skb_pp_frag_ref(from)) { for (i = 0; i < from_shinfo->nr_frags; i++) __skb_frag_ref(&from_shinfo->frags[i]); } to->truesize += delta; to->len += len; to->data_len += len; *delta_truesize = delta; return true; } EXPORT_SYMBOL(skb_try_coalesce); /** * skb_scrub_packet - scrub an skb * * @skb: buffer to clean * @xnet: packet is crossing netns * * skb_scrub_packet can be used after encapsulating or decapsulating a packet * into/from a tunnel. Some information have to be cleared during these * operations. * skb_scrub_packet can also be used to clean a skb before injecting it in * another namespace (@xnet == true). We have to clear all information in the * skb that could impact namespace isolation. */ void skb_scrub_packet(struct sk_buff *skb, bool xnet) { skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); skb_ext_reset(skb); nf_reset_ct(skb); nf_reset_trace(skb); #ifdef CONFIG_NET_SWITCHDEV skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif if (!xnet) return; ipvs_reset(skb); skb->mark = 0; skb_clear_tstamp(skb); } EXPORT_SYMBOL_GPL(skb_scrub_packet); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len, meta_len; void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } mac_len = skb->data - skb_mac_header(skb); if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) { memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } meta_len = skb_metadata_len(skb); if (meta_len) { meta = skb_metadata_end(skb) - meta_len; memmove(meta + VLAN_HLEN, meta, meta_len); } skb->mac_header += VLAN_HLEN; return skb; } struct sk_buff *skb_vlan_untag(struct sk_buff *skb) { struct vlan_hdr *vhdr; u16 vlan_tci; if (unlikely(skb_vlan_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto err_free; /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short)))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb = skb_reorder_vlan_header(skb); if (unlikely(!skb)) goto err_free; skb_reset_network_header(skb); if (!skb_transport_header_was_set(skb)) skb_reset_transport_header(skb); skb_reset_mac_len(skb); return skb; err_free: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(skb_vlan_untag); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) { if (!pskb_may_pull(skb, write_len)) return -ENOMEM; if (!skb_frags_readable(skb)) return -EFAULT; if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } EXPORT_SYMBOL(skb_ensure_writable); int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev) { int needed_headroom = dev->needed_headroom; int needed_tailroom = dev->needed_tailroom; /* For tail taggers, we need to pad short frames ourselves, to ensure * that the tail tag does not fail at its role of being at the end of * the packet, once the conduit interface pads the frame. Account for * that pad length here, and pad later. */ if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) needed_tailroom += ETH_ZLEN - skb->len; /* skb_headroom() returns unsigned int... */ needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) /* No reallocation needed, yay! */ return 0; return pskb_expand_head(skb, needed_headroom, needed_tailroom, GFP_ATOMIC); } EXPORT_SYMBOL(skb_ensure_writable_head_tail); /* remove VLAN header from packet and update csum accordingly. * expects a non skb_vlan_tag_present skb with a vlan tag payload */ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { int offset = skb->data - skb_mac_header(skb); int err; if (WARN_ONCE(offset, "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", offset)) { return -EINVAL; } err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); vlan_remove_tag(skb, vlan_tci); skb->mac_header += VLAN_HLEN; if (skb_network_offset(skb) < ETH_HLEN) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); return err; } EXPORT_SYMBOL(__skb_vlan_pop); /* Pop a vlan tag either from hwaccel or from payload. * Expects skb->data at mac header. */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; __be16 vlan_proto; int err; if (likely(skb_vlan_tag_present(skb))) { __vlan_hwaccel_clear_tag(skb); } else { if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); if (err) return err; } /* move next vlan tag to hw accel tag */ if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; err = __skb_vlan_pop(skb, &vlan_tci); if (unlikely(err)) return err; __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; } EXPORT_SYMBOL(skb_vlan_pop); /* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). * Expects skb->data at mac header. */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { int offset = skb->data - skb_mac_header(skb); int err; if (WARN_ONCE(offset, "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", offset)) { return -EINVAL; } err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); if (err) return err; skb->protocol = skb->vlan_proto; skb->network_header -= VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; } EXPORT_SYMBOL(skb_vlan_push); /** * skb_eth_pop() - Drop the Ethernet header at the head of a packet * * @skb: Socket buffer to modify * * Drop the Ethernet header of @skb. * * Expects that skb->data points to the mac header and that no VLAN tags are * present. * * Returns 0 on success, -errno otherwise. */ int skb_eth_pop(struct sk_buff *skb) { if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) || skb_network_offset(skb) < ETH_HLEN) return -EPROTO; skb_pull_rcsum(skb, ETH_HLEN); skb_reset_mac_header(skb); skb_reset_mac_len(skb); return 0; } EXPORT_SYMBOL(skb_eth_pop); /** * skb_eth_push() - Add a new Ethernet header at the head of a packet * * @skb: Socket buffer to modify * @dst: Destination MAC address of the new header * @src: Source MAC address of the new header * * Prepend @skb with a new Ethernet header. * * Expects that skb->data points to the mac header, which must be empty. * * Returns 0 on success, -errno otherwise. */ int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, const unsigned char *src) { struct ethhdr *eth; int err; if (skb_network_offset(skb) || skb_vlan_tag_present(skb)) return -EPROTO; err = skb_cow_head(skb, sizeof(*eth)); if (err < 0) return err; skb_push(skb, sizeof(*eth)); skb_reset_mac_header(skb); skb_reset_mac_len(skb); eth = eth_hdr(skb); ether_addr_copy(eth->h_dest, dst); ether_addr_copy(eth->h_source, src); eth->h_proto = skb->protocol; skb_postpush_rcsum(skb, eth, sizeof(*eth)); return 0; } EXPORT_SYMBOL(skb_eth_push); /* Update the ethertype of hdr and the skb csum value if required. */ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { if (skb->ip_summed == CHECKSUM_COMPLETE) { __be16 diff[] = { ~hdr->h_proto, ethertype }; skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } hdr->h_proto = ethertype; } /** * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of * the packet * * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) * @mac_len: length of the MAC header * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is * ethernet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len, bool ethernet) { struct mpls_shim_hdr *lse; int err; if (unlikely(!eth_p_mpls(mpls_proto))) return -EINVAL; /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) return -EINVAL; err = skb_cow_head(skb, MPLS_HLEN); if (unlikely(err)) return err; if (!skb->inner_protocol) { skb_set_inner_network_header(skb, skb_network_offset(skb)); skb_set_inner_protocol(skb, skb->protocol); } skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), mac_len); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_reset_mac_len(skb); lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_push); /** * skb_mpls_pop() - pop the outermost MPLS header * * @skb: buffer * @next_proto: ethertype of header after popped MPLS header * @mac_len: length of the MAC header * @ethernet: flag to indicate if the packet is ethernet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, bool ethernet) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return 0; err = skb_ensure_writable(skb, mac_len + MPLS_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), mac_len); __skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); skb_mod_eth_type(skb, hdr, next_proto); } skb->protocol = next_proto; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_pop); /** * skb_mpls_update_lse() - modify outermost MPLS header and update csum * * @skb: buffer * @mpls_lse: new MPLS label stack entry to update to * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); if (unlikely(err)) return err; if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse }; skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } mpls_hdr(skb)->label_stack_entry = mpls_lse; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_update_lse); /** * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header * * @skb: buffer * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_dec_ttl(struct sk_buff *skb) { u32 lse; u8 ttl; if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) return -ENOMEM; lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) return -EINVAL; lse &= ~MPLS_LS_TTL_MASK; lse |= ttl << MPLS_LS_TTL_SHIFT; return skb_mpls_update_lse(skb, cpu_to_be32(lse)); } EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl); /** * alloc_skb_with_frags - allocate skb with page frags * * @header_len: size of linear part * @data_len: needed length in frags * @order: max page order desired. * @errcode: pointer to error code if any * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long data_len, int order, int *errcode, gfp_t gfp_mask) { unsigned long chunk; struct sk_buff *skb; struct page *page; int nr_frags = 0; *errcode = -EMSGSIZE; if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order))) return NULL; *errcode = -ENOBUFS; skb = alloc_skb(header_len, gfp_mask); if (!skb) return NULL; while (data_len) { if (nr_frags == MAX_SKB_FRAGS - 1) goto failure; while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order)) order--; if (order) { page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN, order); if (!page) { order--; continue; } } else { page = alloc_page(gfp_mask); if (!page) goto failure; } chunk = min_t(unsigned long, data_len, PAGE_SIZE << order); skb_fill_page_desc(skb, nr_frags, page, 0, chunk); nr_frags++; skb->truesize += (PAGE_SIZE << order); data_len -= chunk; } return skb; failure: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(alloc_skb_with_frags); /* carve out the first off bytes from skb when off < headlen */ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, const int headlen, gfp_t gfp_mask) { int i; unsigned int size = skb_end_offset(skb); int new_hlen = headlen - off; u8 *data; if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(size); /* Copy real data, and all frags */ skb_copy_from_linear_data_offset(skb, off, data, new_hlen); skb->len -= off; memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); if (skb_cloned(skb)) { /* drop the old head gracefully */ if (skb_orphan_frags(skb, gfp_mask)) { skb_kfree_head(data, size); return -ENOMEM; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb, SKB_CONSUMED); } else { /* we can reuse existing recount- all we did was * relocate values */ skb_free_head(skb); } skb->head = data; skb->data = data; skb->head_frag = 0; skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; } static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); /* carve out the first eat bytes from skb's frag_list. May recurse into * pskb_carve() */ static int pskb_carve_frag_list(struct sk_buff *skb, struct skb_shared_info *shinfo, int eat, gfp_t gfp_mask) { struct sk_buff *list = shinfo->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (!list) { pr_err("Not enough bytes to eat. Want %d\n", eat); return -EFAULT; } if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_shared(list)) { clone = skb_clone(list, gfp_mask); if (!clone) return -ENOMEM; insp = list->next; list = clone; } else { /* This may be pulled without problems. */ insp = list; } if (pskb_carve(list, eat, gfp_mask) < 0) { kfree_skb(clone); return -ENOMEM; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; consume_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; shinfo->frag_list = clone; } return 0; } /* carve off first len bytes from skb. Split line (off) is in the * non-linear part of skb */ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, int pos, gfp_t gfp_mask) { int i, k = 0; unsigned int size = skb_end_offset(skb); u8 *data; const int nfrags = skb_shinfo(skb)->nr_frags; struct skb_shared_info *shinfo; if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(size); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { skb_kfree_head(data, size); return -ENOMEM; } shinfo = (struct skb_shared_info *)(data + size); for (i = 0; i < nfrags; i++) { int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + fsize > off) { shinfo->frags[k] = skb_shinfo(skb)->frags[i]; if (pos < off) { /* Split frag. * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, * where splitting is expensive. * 2. Split is accurately. We make this. */ skb_frag_off_add(&shinfo->frags[0], off - pos); skb_frag_size_sub(&shinfo->frags[0], off - pos); } skb_frag_ref(skb, i); k++; } pos += fsize; } shinfo->nr_frags = k; if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); /* split line is in frag list */ if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); skb_kfree_head(data, size); return -ENOMEM; } skb_release_data(skb, SKB_CONSUMED); skb->head = data; skb->head_frag = 0; skb->data = data; skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; skb->len -= off; skb->data_len = skb->len; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; } /* remove len bytes from the beginning of the skb */ static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp) { int headlen = skb_headlen(skb); if (len < headlen) return pskb_carve_inside_header(skb, len, headlen, gfp); else return pskb_carve_inside_nonlinear(skb, len, headlen, gfp); } /* Extract to_copy bytes starting at off from skb, and return this in * a new skb */ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, gfp_t gfp) { struct sk_buff *clone = skb_clone(skb, gfp); if (!clone) return NULL; if (pskb_carve(clone, off, gfp) < 0 || pskb_trim(clone, to_copy)) { kfree_skb(clone); return NULL; } return clone; } EXPORT_SYMBOL(pskb_extract); /** * skb_condense - try to get rid of fragments/frag_list if possible * @skb: buffer * * Can be used to save memory before skb is added to a busy queue. * If packet has bytes in frags and enough tail room in skb->head, * pull all of them, so that we can free the frags right now and adjust * truesize. * Notes: * We do not reallocate skb->head thus can not fail. * Caller must re-evaluate skb->truesize if needed. */ void skb_condense(struct sk_buff *skb) { if (skb->data_len) { if (skb->data_len > skb->end - skb->tail || skb_cloned(skb) || !skb_frags_readable(skb)) return; /* Nice, we can free page frag(s) right now */ __pskb_pull_tail(skb, skb->data_len); } /* At this point, skb->truesize might be over estimated, * because skb had a fragment, and fragments do not tell * their truesize. * When we pulled its content into skb->head, fragment * was freed, but __pskb_pull_tail() could not possibly * adjust skb->truesize, not knowing the frag truesize. */ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); } EXPORT_SYMBOL(skb_condense); #ifdef CONFIG_SKB_EXTENSIONS static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) { return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); } /** * __skb_ext_alloc - allocate a new skb extensions storage * * @flags: See kmalloc(). * * Returns the newly allocated pointer. The pointer can later attached to a * skb via __skb_ext_set(). * Note: caller must handle the skb_ext as an opaque data. */ struct skb_ext *__skb_ext_alloc(gfp_t flags) { struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags); if (new) { memset(new->offset, 0, sizeof(new->offset)); refcount_set(&new->refcnt, 1); } return new; } static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, unsigned int old_active) { struct skb_ext *new; if (refcount_read(&old->refcnt) == 1) return old; new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); if (!new) return NULL; memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE); refcount_set(&new->refcnt, 1); #ifdef CONFIG_XFRM if (old_active & (1 << SKB_EXT_SEC_PATH)) { struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH); unsigned int i; for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } #endif #ifdef CONFIG_MCTP_FLOWS if (old_active & (1 << SKB_EXT_MCTP)) { struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP); if (flow->key) refcount_inc(&flow->key->refs); } #endif __skb_ext_put(old); return new; } /** * __skb_ext_set - attach the specified extension storage to this skb * @skb: buffer * @id: extension id * @ext: extension storage previously allocated via __skb_ext_alloc() * * Existing extensions, if any, are cleared. * * Returns the pointer to the extension. */ void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext) { unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); skb_ext_put(skb); newlen = newoff + skb_ext_type_len[id]; ext->chunks = newlen; ext->offset[id] = newoff; skb->extensions = ext; skb->active_extensions = 1 << id; return skb_ext_get_ptr(ext, id); } /** * skb_ext_add - allocate space for given extension, COW if needed * @skb: buffer * @id: extension to allocate space for * * Allocates enough space for the given extension. * If the extension is already present, a pointer to that extension * is returned. * * If the skb was cloned, COW applies and the returned memory can be * modified without changing the extension space of clones buffers. * * Returns pointer to the extension or NULL on allocation failure. */ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *new, *old = NULL; unsigned int newlen, newoff; if (skb->active_extensions) { old = skb->extensions; new = skb_ext_maybe_cow(old, skb->active_extensions); if (!new) return NULL; if (__skb_ext_exist(new, id)) goto set_active; newoff = new->chunks; } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); new = __skb_ext_alloc(GFP_ATOMIC); if (!new) return NULL; } newlen = newoff + skb_ext_type_len[id]; new->chunks = newlen; new->offset[id] = newoff; set_active: skb->slow_gro = 1; skb->extensions = new; skb->active_extensions |= 1 << id; return skb_ext_get_ptr(new, id); } EXPORT_SYMBOL(skb_ext_add); #ifdef CONFIG_XFRM static void skb_ext_put_sp(struct sec_path *sp) { unsigned int i; for (i = 0; i < sp->len; i++) xfrm_state_put(sp->xvec[i]); } #endif #ifdef CONFIG_MCTP_FLOWS static void skb_ext_put_mctp(struct mctp_flow *flow) { if (flow->key) mctp_key_unref(flow->key); } #endif void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *ext = skb->extensions; skb->active_extensions &= ~(1 << id); if (skb->active_extensions == 0) { skb->extensions = NULL; __skb_ext_put(ext); #ifdef CONFIG_XFRM } else if (id == SKB_EXT_SEC_PATH && refcount_read(&ext->refcnt) == 1) { struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH); skb_ext_put_sp(sp); sp->len = 0; #endif } } EXPORT_SYMBOL(__skb_ext_del); void __skb_ext_put(struct skb_ext *ext) { /* If this is last clone, nothing can increment * it after check passes. Avoids one atomic op. */ if (refcount_read(&ext->refcnt) == 1) goto free_now; if (!refcount_dec_and_test(&ext->refcnt)) return; free_now: #ifdef CONFIG_XFRM if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH)) skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH)); #endif #ifdef CONFIG_MCTP_FLOWS if (__skb_ext_exist(ext, SKB_EXT_MCTP)) skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP)); #endif kmem_cache_free(skbuff_ext_cache, ext); } EXPORT_SYMBOL(__skb_ext_put); #endif /* CONFIG_SKB_EXTENSIONS */ static void kfree_skb_napi_cache(struct sk_buff *skb) { /* if SKB is a clone, don't handle this case */ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { __kfree_skb(skb); return; } local_bh_disable(); __napi_kfree_skb(skb, SKB_CONSUMED); local_bh_enable(); } /** * skb_attempt_defer_free - queue skb for remote freeing * @skb: buffer * * Put @skb in a per-cpu list, using the cpu which * allocated the skb/pages to reduce false sharing * and memory zone spinlock contention. */ void skb_attempt_defer_free(struct sk_buff *skb) { int cpu = skb->alloc_cpu; struct softnet_data *sd; unsigned int defer_max; bool kick; if (cpu == raw_smp_processor_id() || WARN_ON_ONCE(cpu >= nr_cpu_ids) || !cpu_online(cpu)) { nodefer: kfree_skb_napi_cache(skb); return; } DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); DEBUG_NET_WARN_ON_ONCE(skb->destructor); sd = &per_cpu(softnet_data, cpu); defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max); if (READ_ONCE(sd->defer_count) >= defer_max) goto nodefer; spin_lock_bh(&sd->defer_lock); /* Send an IPI every time queue reaches half capacity. */ kick = sd->defer_count == (defer_max >> 1); /* Paired with the READ_ONCE() few lines above */ WRITE_ONCE(sd->defer_count, sd->defer_count + 1); skb->next = sd->defer_list; /* Paired with READ_ONCE() in skb_defer_free_flush() */ WRITE_ONCE(sd->defer_list, skb); spin_unlock_bh(&sd->defer_lock); /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU * if we are unlucky enough (this seems very unlikely). */ if (unlikely(kick)) kick_defer_list_purge(sd, cpu); } static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, size_t offset, size_t len) { const char *kaddr; __wsum csum; kaddr = kmap_local_page(page); csum = csum_partial(kaddr + offset, len, 0); kunmap_local(kaddr); skb->csum = csum_block_add(skb->csum, csum, skb->len); } /** * skb_splice_from_iter - Splice (or copy) pages to skbuff * @skb: The buffer to add pages to * @iter: Iterator representing the pages to be added * @maxsize: Maximum amount of pages to be added * @gfp: Allocation flags * * This is a common helper function for supporting MSG_SPLICE_PAGES. It * extracts pages from an iterator and adds them to the socket buffer if * possible, copying them to fragments if not possible (such as if they're slab * pages). * * Returns the amount of data spliced/copied or -EMSGSIZE if there's * insufficient space in the buffer to transfer anything. */ ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, ssize_t maxsize, gfp_t gfp) { size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags); struct page *pages[8], **ppages = pages; ssize_t spliced = 0, ret = 0; unsigned int i; while (iter->count > 0) { ssize_t space, nr, len; size_t off; ret = -EMSGSIZE; space = frag_limit - skb_shinfo(skb)->nr_frags; if (space < 0) break; /* We might be able to coalesce without increasing nr_frags */ nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages)); len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off); if (len <= 0) { ret = len ?: -EIO; break; } i = 0; do { struct page *page = pages[i++]; size_t part = min_t(size_t, PAGE_SIZE - off, len); ret = -EIO; if (WARN_ON_ONCE(!sendpage_ok(page))) goto out; ret = skb_append_pagefrags(skb, page, off, part, frag_limit); if (ret < 0) { iov_iter_revert(iter, len); goto out; } if (skb->ip_summed == CHECKSUM_NONE) skb_splice_csum_page(skb, page, off, part); off = 0; spliced += part; maxsize -= part; len -= part; } while (len > 0); if (maxsize <= 0) break; } out: skb_len_add(skb, spliced); return spliced ?: ret; } EXPORT_SYMBOL(skb_splice_from_iter); static __always_inline size_t memcpy_from_iter_csum(void *iter_from, size_t progress, size_t len, void *to, void *priv2) { __wsum *csum = priv2; __wsum next = csum_partial_copy_nocheck(iter_from, to + progress, len); *csum = csum_block_add(*csum, next, progress); return 0; } static __always_inline size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress, size_t len, void *to, void *priv2) { __wsum next, *csum = priv2; next = csum_and_copy_from_user(iter_from, to + progress, len); *csum = csum_block_add(*csum, next, progress); return next ? 0 : len; } bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) { size_t copied; if (WARN_ON_ONCE(!i->data_source)) return false; copied = iterate_and_advance2(i, bytes, addr, csum, copy_from_user_iter_csum, memcpy_from_iter_csum); if (likely(copied == bytes)) return true; iov_iter_revert(i, copied); return false; } EXPORT_SYMBOL(csum_and_copy_from_iter_full);
23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 // SPDX-License-Identifier: GPL-2.0 /* * The class-specific portions of the driver model * * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org> * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2008-2009 Novell Inc. * Copyright (c) 2012-2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org> * Copyright (c) 2012-2019 Linux Foundation * * See Documentation/driver-api/driver-model/ for more information. */ #ifndef _DEVICE_CLASS_H_ #define _DEVICE_CLASS_H_ #include <linux/kobject.h> #include <linux/klist.h> #include <linux/pm.h> #include <linux/device/bus.h> struct device; struct fwnode_handle; /** * struct class - device classes * @name: Name of the class. * @class_groups: Default attributes of this class. * @dev_groups: Default attributes of the devices that belong to the class. * @dev_uevent: Called when a device is added, removed from this class, or a * few other things that generate uevents to add the environment * variables. * @devnode: Callback to provide the devtmpfs. * @class_release: Called to release this class. * @dev_release: Called to release the device. * @shutdown_pre: Called at shut-down time before driver shutdown. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. * @get_ownership: Allows class to specify uid/gid of the sysfs directories * for the devices belonging to the class. Usually tied to * device's namespace. * @pm: The default device power management operations of this class. * * A class is a higher-level view of a device that abstracts out low-level * implementation details. Drivers may see a SCSI disk or an ATA disk, but, * at the class level, they are all simply disks. Classes allow user space * to work with devices based on what they do, rather than how they are * connected or how they work. */ struct class { const char *name; const struct attribute_group **class_groups; const struct attribute_group **dev_groups; int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(const struct device *dev, umode_t *mode); void (*class_release)(const struct class *class); void (*dev_release)(struct device *dev); int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(const struct device *dev); void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid); const struct dev_pm_ops *pm; }; struct class_dev_iter { struct klist_iter ki; const struct device_type *type; struct subsys_private *sp; }; int __must_check class_register(const struct class *class); void class_unregister(const struct class *class); bool class_is_registered(const struct class *class); struct class_compat; struct class_compat *class_compat_register(const char *name); void class_compat_unregister(struct class_compat *cls); int class_compat_create_link(struct class_compat *cls, struct device *dev, struct device *device_link); void class_compat_remove_link(struct class_compat *cls, struct device *dev, struct device *device_link); void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, const struct device *start, const struct device_type *type); struct device *class_dev_iter_next(struct class_dev_iter *iter); void class_dev_iter_exit(struct class_dev_iter *iter); int class_for_each_device(const struct class *class, const struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *class_find_device(const struct class *class, const struct device *start, const void *data, device_match_t match); /** * class_find_device_by_name - device iterator for locating a particular device * of a specific name. * @class: class type * @name: name of the device to match */ static inline struct device *class_find_device_by_name(const struct class *class, const char *name) { return class_find_device(class, NULL, name, device_match_name); } /** * class_find_device_by_of_node : device iterator for locating a particular device * matching the of_node. * @class: class type * @np: of_node of the device to match. */ static inline struct device *class_find_device_by_of_node(const struct class *class, const struct device_node *np) { return class_find_device(class, NULL, np, device_match_of_node); } /** * class_find_device_by_fwnode : device iterator for locating a particular device * matching the fwnode. * @class: class type * @fwnode: fwnode of the device to match. */ static inline struct device *class_find_device_by_fwnode(const struct class *class, const struct fwnode_handle *fwnode) { return class_find_device(class, NULL, fwnode, device_match_fwnode); } /** * class_find_device_by_devt : device iterator for locating a particular device * matching the device type. * @class: class type * @devt: device type of the device to match. */ static inline struct device *class_find_device_by_devt(const struct class *class, dev_t devt) { return class_find_device(class, NULL, &devt, device_match_devt); } #ifdef CONFIG_ACPI struct acpi_device; /** * class_find_device_by_acpi_dev : device iterator for locating a particular * device matching the ACPI_COMPANION device. * @class: class type * @adev: ACPI_COMPANION device to match. */ static inline struct device *class_find_device_by_acpi_dev(const struct class *class, const struct acpi_device *adev) { return class_find_device(class, NULL, adev, device_match_acpi_dev); } #else static inline struct device *class_find_device_by_acpi_dev(const struct class *class, const void *adev) { return NULL; } #endif struct class_attribute { struct attribute attr; ssize_t (*show)(const struct class *class, const struct class_attribute *attr, char *buf); ssize_t (*store)(const struct class *class, const struct class_attribute *attr, const char *buf, size_t count); }; #define CLASS_ATTR_RW(_name) \ struct class_attribute class_attr_##_name = __ATTR_RW(_name) #define CLASS_ATTR_RO(_name) \ struct class_attribute class_attr_##_name = __ATTR_RO(_name) #define CLASS_ATTR_WO(_name) \ struct class_attribute class_attr_##_name = __ATTR_WO(_name) int __must_check class_create_file_ns(const struct class *class, const struct class_attribute *attr, const void *ns); void class_remove_file_ns(const struct class *class, const struct class_attribute *attr, const void *ns); static inline int __must_check class_create_file(const struct class *class, const struct class_attribute *attr) { return class_create_file_ns(class, attr, NULL); } static inline void class_remove_file(const struct class *class, const struct class_attribute *attr) { return class_remove_file_ns(class, attr, NULL); } /* Simple class attribute that is just a static string */ struct class_attribute_string { struct class_attribute attr; char *str; }; /* Currently read-only only */ #define _CLASS_ATTR_STRING(_name, _mode, _str) \ { __ATTR(_name, _mode, show_class_attr_string, NULL), _str } #define CLASS_ATTR_STRING(_name, _mode, _str) \ struct class_attribute_string class_attr_##_name = \ _CLASS_ATTR_STRING(_name, _mode, _str) ssize_t show_class_attr_string(const struct class *class, const struct class_attribute *attr, char *buf); struct class_interface { struct list_head node; const struct class *class; int (*add_dev) (struct device *dev); void (*remove_dev) (struct device *dev); }; int __must_check class_interface_register(struct class_interface *); void class_interface_unregister(struct class_interface *); struct class * __must_check class_create(const char *name); void class_destroy(const struct class *cls); #endif /* _DEVICE_CLASS_H_ */
6 5 2 2 5 5 5 2 3 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 /* * Cryptographic API. * * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions * * Copyright (C) 2013 Intel Corporation * Author: Tim Chen <tim.c.chen@linux.intel.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-t10dif.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/string.h> #include <linux/kernel.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); struct chksum_desc_ctx { __u16 crc; }; static int chksum_init(struct shash_desc *desc) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); ctx->crc = 0; return 0; } static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); if (length >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); kernel_fpu_end(); } else ctx->crc = crc_t10dif_generic(ctx->crc, data, length); return 0; } static int chksum_final(struct shash_desc *desc, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); *(__u16 *)out = ctx->crc; return 0; } static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { if (len >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { .digestsize = CRC_T10DIF_DIGEST_SIZE, .init = chksum_init, .update = chksum_update, .final = chksum_final, .finup = chksum_finup, .digest = chksum_digest, .descsize = sizeof(struct chksum_desc_ctx), .base = { .cra_name = "crct10dif", .cra_driver_name = "crct10dif-pclmul", .cra_priority = 200, .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static const struct x86_cpu_id crct10dif_cpu_id[] = { X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id); static int __init crct10dif_intel_mod_init(void) { if (!x86_match_cpu(crct10dif_cpu_id)) return -ENODEV; return crypto_register_shash(&alg); } static void __exit crct10dif_intel_mod_fini(void) { crypto_unregister_shash(&alg); } module_init(crct10dif_intel_mod_init); module_exit(crct10dif_intel_mod_fini); MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crct10dif"); MODULE_ALIAS_CRYPTO("crct10dif-pclmul");
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 // SPDX-License-Identifier: GPL-2.0-or-later /* * CDC Ethernet based networking peripherals * Copyright (C) 2003-2005 by David Brownell * Copyright (C) 2006 by Ole Andre Vadla Ravnas (ActiveSync) */ // #define DEBUG // error path messages, extra info // #define VERBOSE // more; success messages #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> #if IS_ENABLED(CONFIG_USB_NET_RNDIS_HOST) static int is_rndis(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_COMM && desc->bInterfaceSubClass == 2 && desc->bInterfaceProtocol == 0xff); } static int is_activesync(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_MISC && desc->bInterfaceSubClass == 1 && desc->bInterfaceProtocol == 1); } static int is_wireless_rndis(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_WIRELESS_CONTROLLER && desc->bInterfaceSubClass == 1 && desc->bInterfaceProtocol == 3); } static int is_novatel_rndis(struct usb_interface_descriptor *desc) { return (desc->bInterfaceClass == USB_CLASS_MISC && desc->bInterfaceSubClass == 4 && desc->bInterfaceProtocol == 1); } #else #define is_rndis(desc) 0 #define is_activesync(desc) 0 #define is_wireless_rndis(desc) 0 #define is_novatel_rndis(desc) 0 #endif static const u8 mbm_guid[16] = { 0xa3, 0x17, 0xa8, 0x8b, 0x04, 0x5e, 0x4f, 0x01, 0xa6, 0x07, 0xc0, 0xff, 0xcb, 0x7e, 0x39, 0x2a, }; void usbnet_cdc_update_filter(struct usbnet *dev) { struct net_device *net = dev->net; u16 cdc_filter = USB_CDC_PACKET_TYPE_DIRECTED | USB_CDC_PACKET_TYPE_BROADCAST; /* filtering on the device is an optional feature and not worth * the hassle so we just roughly care about snooping and if any * multicast is requested, we take every multicast */ if (net->flags & IFF_PROMISC) cdc_filter |= USB_CDC_PACKET_TYPE_PROMISCUOUS; if (!netdev_mc_empty(net) || (net->flags & IFF_ALLMULTI)) cdc_filter |= USB_CDC_PACKET_TYPE_ALL_MULTICAST; usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), USB_CDC_SET_ETHERNET_PACKET_FILTER, USB_TYPE_CLASS | USB_RECIP_INTERFACE, cdc_filter, dev->intf->cur_altsetting->desc.bInterfaceNumber, NULL, 0, USB_CTRL_SET_TIMEOUT ); } EXPORT_SYMBOL_GPL(usbnet_cdc_update_filter); /* We need to override usbnet_*_link_ksettings in bind() */ static const struct ethtool_ops cdc_ether_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = usbnet_get_link_ksettings_internal, .set_link_ksettings = NULL, }; /* probes control interface, claims data interface, collects the bulk * endpoints, activates data interface (if needed), maybe sets MTU. * all pure cdc, except for certain firmware workarounds, and knowing * that rndis uses one different rule. */ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) { u8 *buf = intf->cur_altsetting->extra; int len = intf->cur_altsetting->extralen; struct usb_interface_descriptor *d; struct cdc_state *info = (void *) &dev->data; int status; int rndis; bool android_rndis_quirk = false; struct usb_driver *driver = driver_of(intf); struct usb_cdc_parsed_header header; if (sizeof(dev->data) < sizeof(*info)) return -EDOM; /* expect strict spec conformance for the descriptors, but * cope with firmware which stores them in the wrong place */ if (len == 0 && dev->udev->actconfig->extralen) { /* Motorola SB4100 (and others: Brad Hards says it's * from a Broadcom design) put CDC descriptors here */ buf = dev->udev->actconfig->extra; len = dev->udev->actconfig->extralen; dev_dbg(&intf->dev, "CDC descriptors on config\n"); } /* Maybe CDC descriptors are after the endpoint? This bug has * been seen on some 2Wire Inc RNDIS-ish products. */ if (len == 0) { struct usb_host_endpoint *hep; hep = intf->cur_altsetting->endpoint; if (hep) { buf = hep->extra; len = hep->extralen; } if (len) dev_dbg(&intf->dev, "CDC descriptors on endpoint\n"); } /* this assumes that if there's a non-RNDIS vendor variant * of cdc-acm, it'll fail RNDIS requests cleanly. */ rndis = (is_rndis(&intf->cur_altsetting->desc) || is_activesync(&intf->cur_altsetting->desc) || is_wireless_rndis(&intf->cur_altsetting->desc) || is_novatel_rndis(&intf->cur_altsetting->desc)); memset(info, 0, sizeof(*info)); info->control = intf; cdc_parse_cdc_header(&header, intf, buf, len); info->u = header.usb_cdc_union_desc; info->header = header.usb_cdc_header_desc; info->ether = header.usb_cdc_ether_desc; if (!info->u) { if (rndis) goto skip; else /* in that case a quirk is mandatory */ goto bad_desc; } /* we need a master/control interface (what we're * probed with) and a slave/data interface; union * descriptors sort this all out. */ info->control = usb_ifnum_to_if(dev->udev, info->u->bMasterInterface0); info->data = usb_ifnum_to_if(dev->udev, info->u->bSlaveInterface0); if (!info->control || !info->data) { dev_dbg(&intf->dev, "master #%u/%p slave #%u/%p\n", info->u->bMasterInterface0, info->control, info->u->bSlaveInterface0, info->data); /* fall back to hard-wiring for RNDIS */ if (rndis) { android_rndis_quirk = true; goto skip; } goto bad_desc; } if (info->control != intf) { dev_dbg(&intf->dev, "bogus CDC Union\n"); /* Ambit USB Cable Modem (and maybe others) * interchanges master and slave interface. */ if (info->data == intf) { info->data = info->control; info->control = intf; } else goto bad_desc; } /* some devices merge these - skip class check */ if (info->control == info->data) goto skip; /* a data interface altsetting does the real i/o */ d = &info->data->cur_altsetting->desc; if (d->bInterfaceClass != USB_CLASS_CDC_DATA) { dev_dbg(&intf->dev, "slave class %u\n", d->bInterfaceClass); goto bad_desc; } skip: /* Communication class functions with bmCapabilities are not * RNDIS. But some Wireless class RNDIS functions use * bmCapabilities for their own purpose. The failsafe is * therefore applied only to Communication class RNDIS * functions. The rndis test is redundant, but a cheap * optimization. */ if (rndis && is_rndis(&intf->cur_altsetting->desc) && header.usb_cdc_acm_descriptor && header.usb_cdc_acm_descriptor->bmCapabilities) { dev_dbg(&intf->dev, "ACM capabilities %02x, not really RNDIS?\n", header.usb_cdc_acm_descriptor->bmCapabilities); goto bad_desc; } if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize); /* because of Zaurus, we may be ignoring the host * side link address we were given. */ } if (header.usb_cdc_mdlm_desc && memcmp(header.usb_cdc_mdlm_desc->bGUID, mbm_guid, 16)) { dev_dbg(&intf->dev, "GUID doesn't match\n"); goto bad_desc; } if (header.usb_cdc_mdlm_detail_desc && header.usb_cdc_mdlm_detail_desc->bLength < (sizeof(struct usb_cdc_mdlm_detail_desc) + 1)) { dev_dbg(&intf->dev, "Descriptor too short\n"); goto bad_desc; } /* Microsoft ActiveSync based and some regular RNDIS devices lack the * CDC descriptors, so we'll hard-wire the interfaces and not check * for descriptors. * * Some Android RNDIS devices have a CDC Union descriptor pointing * to non-existing interfaces. Ignore that and attempt the same * hard-wired 0 and 1 interfaces. */ if (rndis && (!info->u || android_rndis_quirk)) { info->control = usb_ifnum_to_if(dev->udev, 0); info->data = usb_ifnum_to_if(dev->udev, 1); if (!info->control || !info->data || info->control != intf) { dev_dbg(&intf->dev, "rndis: master #0/%p slave #1/%p\n", info->control, info->data); goto bad_desc; } } else if (!info->header || (!rndis && !info->ether)) { dev_dbg(&intf->dev, "missing cdc %s%s%sdescriptor\n", info->header ? "" : "header ", info->u ? "" : "union ", info->ether ? "" : "ether "); goto bad_desc; } /* claim data interface and set it up ... with side effects. * network traffic can't flow until an altsetting is enabled. */ if (info->data != info->control) { status = usb_driver_claim_interface(driver, info->data, dev); if (status < 0) return status; } status = usbnet_get_endpoints(dev, info->data); if (status < 0) { /* ensure immediate exit from usbnet_disconnect */ usb_set_intfdata(info->data, NULL); if (info->data != info->control) usb_driver_release_interface(driver, info->data); return status; } /* status endpoint: optional for CDC Ethernet, not RNDIS (or ACM) */ if (info->data != info->control) dev->status = NULL; if (info->control->cur_altsetting->desc.bNumEndpoints == 1) { struct usb_endpoint_descriptor *desc; dev->status = &info->control->cur_altsetting->endpoint[0]; desc = &dev->status->desc; if (!usb_endpoint_is_int_in(desc) || (le16_to_cpu(desc->wMaxPacketSize) < sizeof(struct usb_cdc_notification)) || !desc->bInterval) { dev_dbg(&intf->dev, "bad notification endpoint\n"); dev->status = NULL; } } if (rndis && !dev->status) { dev_dbg(&intf->dev, "missing RNDIS status endpoint\n"); usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver, info->data); return -ENODEV; } /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ether_ethtool_ops; return 0; bad_desc: dev_info(&dev->udev->dev, "bad CDC descriptors\n"); return -ENODEV; } EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); /* like usbnet_generic_cdc_bind() but handles filter initialization * correctly */ int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) { int rv; rv = usbnet_generic_cdc_bind(dev, intf); if (rv < 0) goto bail_out; /* Some devices don't initialise properly. In particular * the packet filter is not reset. There are devices that * don't do reset all the way. So the packet filter should * be set to a sane initial value. */ usbnet_cdc_update_filter(dev); bail_out: return rv; } EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind); void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_state *info = (void *) &dev->data; struct usb_driver *driver = driver_of(intf); /* combined interface - nothing to do */ if (info->data == info->control) return; /* disconnect master --> disconnect slave */ if (intf == info->control && info->data) { /* ensure immediate exit from usbnet_disconnect */ usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver, info->data); info->data = NULL; } /* and vice versa (just in case) */ else if (intf == info->data && info->control) { /* ensure immediate exit from usbnet_disconnect */ usb_set_intfdata(info->control, NULL); usb_driver_release_interface(driver, info->control); info->control = NULL; } } EXPORT_SYMBOL_GPL(usbnet_cdc_unbind); /* Communications Device Class, Ethernet Control model * * Takes two interfaces. The DATA interface is inactive till an altsetting * is selected. Configuration data includes class descriptors. There's * an optional status endpoint on the control interface. * * This should interop with whatever the 2.4 "CDCEther.c" driver * (by Brad Hards) talked with, with more functionality. */ static void speed_change(struct usbnet *dev, __le32 *speeds) { dev->tx_speed = __le32_to_cpu(speeds[0]); dev->rx_speed = __le32_to_cpu(speeds[1]); } void usbnet_cdc_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event; if (urb->actual_length < sizeof(*event)) return; /* SPEED_CHANGE can get split into two 8-byte packets */ if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) { speed_change(dev, (__le32 *) urb->transfer_buffer); return; } event = urb->transfer_buffer; switch (event->bNotificationType) { case USB_CDC_NOTIFY_NETWORK_CONNECTION: netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", event->wValue ? "on" : "off"); if (netif_carrier_ok(dev->net) != !!event->wValue) usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: /* tx/rx rates */ netif_dbg(dev, timer, dev->net, "CDC: speed change (len %d)\n", urb->actual_length); if (urb->actual_length != (sizeof(*event) + 8)) set_bit(EVENT_STS_SPLIT, &dev->flags); else speed_change(dev, (__le32 *) &event[1]); break; /* USB_CDC_NOTIFY_RESPONSE_AVAILABLE can happen too (e.g. RNDIS), * but there are no standard formats for the response data. */ default: netdev_err(dev->net, "CDC: unexpected notification %02x!\n", event->bNotificationType); break; } } EXPORT_SYMBOL_GPL(usbnet_cdc_status); int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) { int status; struct cdc_state *info = (void *) &dev->data; BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct cdc_state))); status = usbnet_ether_cdc_bind(dev, intf); if (status < 0) return status; status = usbnet_get_ethernet_addr(dev, info->ether->iMACAddress); if (status < 0) { usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver_of(intf), info->data); return status; } return 0; } EXPORT_SYMBOL_GPL(usbnet_cdc_bind); static int usbnet_cdc_zte_bind(struct usbnet *dev, struct usb_interface *intf) { int status = usbnet_cdc_bind(dev, intf); if (!status && (dev->net->dev_addr[0] & 0x02)) eth_hw_addr_random(dev->net); return status; } /* Make sure packets have correct destination MAC address * * A firmware bug observed on some devices (ZTE MF823/831/910) is that the * device sends packets with a static, bogus, random MAC address (event if * device MAC address has been updated). Always set MAC address to that of the * device. */ int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { if (skb->len < ETH_HLEN || !(skb->data[0] & 0x02)) return 1; skb_reset_mac_header(skb); ether_addr_copy(eth_hdr(skb)->h_dest, dev->net->dev_addr); return 1; } EXPORT_SYMBOL_GPL(usbnet_cdc_zte_rx_fixup); /* Ensure correct link state * * Some devices (ZTE MF823/831/910) export two carrier on notifications when * connected. This causes the link state to be incorrect. Work around this by * always setting the state to off, then on. */ static void usbnet_cdc_zte_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event; if (urb->actual_length < sizeof(*event)) return; event = urb->transfer_buffer; if (event->bNotificationType != USB_CDC_NOTIFY_NETWORK_CONNECTION) { usbnet_cdc_status(dev, urb); return; } netif_dbg(dev, timer, dev->net, "CDC: carrier %s\n", event->wValue ? "on" : "off"); if (event->wValue && netif_carrier_ok(dev->net)) netif_carrier_off(dev->net); usbnet_link_change(dev, !!event->wValue, 0); } static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, }; static const struct driver_info zte_cdc_info = { .description = "ZTE CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, .bind = usbnet_cdc_zte_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_zte_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, .rx_fixup = usbnet_cdc_zte_rx_fixup, }; static const struct driver_info wwan_info = { .description = "Mobile Broadband Network Device", .flags = FLAG_WWAN, .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, }; /*-------------------------------------------------------------------------*/ #define HUAWEI_VENDOR_ID 0x12D1 #define NOVATEL_VENDOR_ID 0x1410 #define ZTE_VENDOR_ID 0x19D2 #define DELL_VENDOR_ID 0x413C #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef #define LINKSYS_VENDOR_ID 0x13b1 #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e #define UBLOX_VENDOR_ID 0x1546 #define TPLINK_VENDOR_ID 0x2357 #define AQUANTIA_VENDOR_ID 0x2eca #define ASIX_VENDOR_ID 0x0b95 static const struct usb_device_id products[] = { /* BLACKLIST !! * * First blacklist any products that are egregiously nonconformant * with the CDC Ethernet specs. Minor braindamage we cope with; when * they're not even trying, needing a separate driver is only the first * of the differences to show up. */ #define ZAURUS_MASTER_INTERFACE \ .bInterfaceClass = USB_CLASS_COMM, \ .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE #define ZAURUS_FAKE_INTERFACE \ .bInterfaceClass = USB_CLASS_COMM, \ .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE /* SA-1100 based Sharp Zaurus ("collie"), or compatible; * wire-incompatible with true CDC Ethernet implementations. * (And, it seems, needlessly so...) */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8004, ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, /* PXA-25x based Sharp Zaurii. Note that it seems some of these * (later models especially) may have shipped only with firmware * advertising false "CDC MDLM" compatibility ... but we're not * clear which models did that, so for now let's assume the worst. */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8005, /* A-300 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8005, /* A-300 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x8007, /* C-700 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9031, /* C-750 C-760 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, .idProduct = 0x9032, /* SL-6000 */ ZAURUS_FAKE_INTERFACE, .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, /* reported with some C860 units */ .idProduct = 0x9050, /* C-860 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, /* Olympus has some models with a Zaurus-compatible option. * R-1000 uses a FreeScale i.MXL cpu (ARMv4T) */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x07B4, .idProduct = 0x0F02, /* R-1000 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, }, /* LG Electronics VL600 wants additional headers on every frame */ { USB_DEVICE_AND_INTERFACE_INFO(0x1004, 0x61aa, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Logitech Harmony 900 - uses the pseudo-MDLM (BLAN) driver */ { USB_DEVICE_AND_INTERFACE_INFO(0x046d, 0xc11f, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel USB551L and MC551 - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0xB001, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel E362 - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9010, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8195, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8196, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Dell Wireless 5804 (Novatel E371) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x819b, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel Expedite E371 - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9011, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* HP lt2523 (Novatel E371) - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* AnyDATA ADU960S - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Huawei E1820 - handled by qmi_wwan */ { USB_DEVICE_INTERFACE_NUMBER(HUAWEI_VENDOR_ID, 0x14ac, 1), .driver_info = 0, }, /* Realtek RTL8153 Based USB 3.0 Ethernet Adapters */ { USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8153, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Aquantia AQtion USB to 5GbE Controller (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(AQUANTIA_VENDOR_ID, 0xc101, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* ASIX USB 3.1 Gen1 to 5G Multi-Gigabit Ethernet Adapter(based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(ASIX_VENDOR_ID, 0x2790, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* ASIX USB 3.1 Gen1 to 2.5G Multi-Gigabit Ethernet Adapter(based on AQC112U) */ { USB_DEVICE_AND_INTERFACE_INFO(ASIX_VENDOR_ID, 0x2791, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* USB-C 3.1 to 5GBASE-T Ethernet Adapter (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0xe05a, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* QNAP QNA-UC5G1T USB to 5GbE Adapter (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(0x1c04, 0x0015, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. * We match the main interface, ignoring the optional device * class so we could handle devices that aren't exclusively * CDC ether. * * NOTE: this match must come AFTER entries blacklisting devices * because of bugs/quirks in a given product (like Zaurus, above). */ { /* ZTE (Vodafone) K3805-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1003, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K3806-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1015, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K4510-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1173, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K3770-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1177, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE (Vodafone) K3772-Z */ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1181, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Telit modules */ USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (kernel_ulong_t) &wwan_info, }, { /* Dell DW5580 modules */ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (kernel_ulong_t)&wwan_info, }, { /* Huawei ME906 and ME909 */ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x15c1, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* ZTE modules */ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&zte_cdc_info, }, { /* U-blox TOBY-L2 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1143, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* U-blox SARA-U2 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1104, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* U-blox LARA-R6 01B */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1313, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* U-blox LARA-L6 */ USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1343, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS8 modem by GEMALTO */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0061, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion AHS3 modem by GEMALTO */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS62-W modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { /* Various Huawei modems with a network port like the UMG1831 */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, 255), .driver_info = (unsigned long)&wwan_info, }, { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver cdc_driver = { .name = "cdc_ether", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .reset_resume = usbnet_resume, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; module_usb_driver(cdc_driver); MODULE_AUTHOR("David Brownell"); MODULE_DESCRIPTION("USB CDC Ethernet devices"); MODULE_LICENSE("GPL");
5 5 5 5 5 8 2 1 2 4 5 2 2 2 5 3 9 9 8 2 1 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 // SPDX-License-Identifier: GPL-2.0-only /* * PS/2 driver library * * Copyright (c) 1999-2002 Vojtech Pavlik * Copyright (c) 2004 Dmitry Torokhov */ #include <linux/delay.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/input.h> #include <linux/kmsan-checks.h> #include <linux/serio.h> #include <linux/i8042.h> #include <linux/libps2.h> #define DRIVER_DESC "PS/2 driver library" #define PS2_CMD_SETSCALE11 0x00e6 #define PS2_CMD_SETRES 0x10e8 #define PS2_CMD_EX_SETLEDS 0x20eb #define PS2_CMD_SETLEDS 0x10ed #define PS2_CMD_GETID 0x02f2 #define PS2_CMD_SETREP 0x10f3 /* Set repeat rate/set report rate */ #define PS2_CMD_RESET_BAT 0x02ff #define PS2_RET_BAT 0xaa #define PS2_RET_ID 0x00 #define PS2_RET_ACK 0xfa #define PS2_RET_NAK 0xfe #define PS2_RET_ERR 0xfc #define PS2_FLAG_ACK BIT(0) /* Waiting for ACK/NAK */ #define PS2_FLAG_CMD BIT(1) /* Waiting for a command to finish */ #define PS2_FLAG_CMD1 BIT(2) /* Waiting for the first byte of command response */ #define PS2_FLAG_WAITID BIT(3) /* Command executing is GET ID */ #define PS2_FLAG_NAK BIT(4) /* Last transmission was NAKed */ #define PS2_FLAG_PASS_NOACK BIT(5) /* Pass non-ACK byte to receive handler */ static int ps2_do_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout, unsigned int max_attempts) __releases(&ps2dev->serio->lock) __acquires(&ps2dev->serio->lock) { int attempt = 0; int error; lockdep_assert_held(&ps2dev->serio->lock); do { ps2dev->nak = 1; ps2dev->flags |= PS2_FLAG_ACK; serio_continue_rx(ps2dev->serio); error = serio_write(ps2dev->serio, byte); if (error) dev_dbg(&ps2dev->serio->dev, "failed to write %#02x: %d\n", byte, error); else wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_ACK), msecs_to_jiffies(timeout)); serio_pause_rx(ps2dev->serio); } while (ps2dev->nak == PS2_RET_NAK && ++attempt < max_attempts); ps2dev->flags &= ~PS2_FLAG_ACK; if (!error) { switch (ps2dev->nak) { case 0: break; case PS2_RET_NAK: error = -EAGAIN; break; case PS2_RET_ERR: error = -EPROTO; break; default: error = -EIO; break; } } if (error || attempt > 1) dev_dbg(&ps2dev->serio->dev, "%02x - %d (%x), attempt %d\n", byte, error, ps2dev->nak, attempt); return error; } /** * ps2_sendbyte - sends a byte to the device and wait for acknowledgement * @ps2dev: a PS/2 device to send the data to * @byte: data to be sent to the device * @timeout: timeout for sending the data and receiving an acknowledge * * The function doesn't handle retransmission, the caller is expected to handle * it when needed. * * ps2_sendbyte() can only be called from a process context. */ int ps2_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout) { int retval; guard(serio_pause_rx)(ps2dev->serio); retval = ps2_do_sendbyte(ps2dev, byte, timeout, 1); dev_dbg(&ps2dev->serio->dev, "%02x - %x\n", byte, ps2dev->nak); return retval; } EXPORT_SYMBOL(ps2_sendbyte); /** * ps2_begin_command - mark beginning of execution of a complex command * @ps2dev: a PS/2 device executing the command * * Serializes a complex/compound command. Once command is finished * ps2_end_command() should be called. */ void ps2_begin_command(struct ps2dev *ps2dev) { struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; mutex_lock(m); } EXPORT_SYMBOL(ps2_begin_command); /** * ps2_end_command - mark end of execution of a complex command * @ps2dev: a PS/2 device executing the command */ void ps2_end_command(struct ps2dev *ps2dev) { struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; mutex_unlock(m); } EXPORT_SYMBOL(ps2_end_command); /** * ps2_drain - waits for device to transmit requested number of bytes * and discards them * @ps2dev: the PS/2 device that should be drained * @maxbytes: maximum number of bytes to be drained * @timeout: time to drain the device */ void ps2_drain(struct ps2dev *ps2dev, size_t maxbytes, unsigned int timeout) { if (maxbytes > sizeof(ps2dev->cmdbuf)) { WARN_ON(1); maxbytes = sizeof(ps2dev->cmdbuf); } ps2_begin_command(ps2dev); scoped_guard(serio_pause_rx, ps2dev->serio) { ps2dev->flags = PS2_FLAG_CMD; ps2dev->cmdcnt = maxbytes; } wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD), msecs_to_jiffies(timeout)); ps2_end_command(ps2dev); } EXPORT_SYMBOL(ps2_drain); /** * ps2_is_keyboard_id - checks received ID byte against the list of * known keyboard IDs * @id_byte: data byte that should be checked */ bool ps2_is_keyboard_id(u8 id_byte) { static const u8 keyboard_ids[] = { 0xab, /* Regular keyboards */ 0xac, /* NCD Sun keyboard */ 0x2b, /* Trust keyboard, translated */ 0x5d, /* Trust keyboard */ 0x60, /* NMB SGI keyboard, translated */ 0x47, /* NMB SGI keyboard */ }; return memchr(keyboard_ids, id_byte, sizeof(keyboard_ids)) != NULL; } EXPORT_SYMBOL(ps2_is_keyboard_id); /* * ps2_adjust_timeout() is called after receiving 1st byte of command * response and tries to reduce remaining timeout to speed up command * completion. */ static int ps2_adjust_timeout(struct ps2dev *ps2dev, unsigned int command, unsigned int timeout) { switch (command) { case PS2_CMD_RESET_BAT: /* * Device has sent the first response byte after * reset command, reset is thus done, so we can * shorten the timeout. * The next byte will come soon (keyboard) or not * at all (mouse). */ if (timeout > msecs_to_jiffies(100)) timeout = msecs_to_jiffies(100); break; case PS2_CMD_GETID: /* * Microsoft Natural Elite keyboard responds to * the GET ID command as it were a mouse, with * a single byte. Fail the command so atkbd will * use alternative probe to detect it. */ if (ps2dev->cmdbuf[1] == 0xaa) { scoped_guard(serio_pause_rx, ps2dev->serio) ps2dev->flags = 0; timeout = 0; } /* * If device behind the port is not a keyboard there * won't be 2nd byte of ID response. */ if (!ps2_is_keyboard_id(ps2dev->cmdbuf[1])) { scoped_guard(serio_pause_rx, ps2dev->serio) ps2dev->flags = ps2dev->cmdcnt = 0; timeout = 0; } break; default: break; } return timeout; } /** * __ps2_command - send a command to PS/2 device * @ps2dev: the PS/2 device that should execute the command * @param: a buffer containing parameters to be sent along with the command, * or place where the results of the command execution will be deposited, * or both * @command: command word that encodes the command itself, as well as number of * additional parameter bytes that should be sent to the device and expected * length of the command response * * Not serialized. Callers should use ps2_begin_command() and ps2_end_command() * to ensure proper serialization for complex commands. */ int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command) { unsigned int timeout; unsigned int send = (command >> 12) & 0xf; unsigned int receive = (command >> 8) & 0xf; int rc; int i; u8 send_param[16]; if (receive > sizeof(ps2dev->cmdbuf)) { WARN_ON(1); return -EINVAL; } if (send && !param) { WARN_ON(1); return -EINVAL; } memcpy(send_param, param, send); /* * Not using guard notation because we need to break critical * section below while waiting for the response. */ serio_pause_rx(ps2dev->serio); ps2dev->cmdcnt = receive; switch (command) { case PS2_CMD_GETID: /* * Some mice do not ACK the "get ID" command, prepare to * handle this. */ ps2dev->flags = PS2_FLAG_WAITID; break; case PS2_CMD_SETLEDS: case PS2_CMD_EX_SETLEDS: case PS2_CMD_SETREP: ps2dev->flags = PS2_FLAG_PASS_NOACK; break; default: ps2dev->flags = 0; break; } if (receive) { /* Indicate that we expect response to the command. */ ps2dev->flags |= PS2_FLAG_CMD | PS2_FLAG_CMD1; if (param) for (i = 0; i < receive; i++) ps2dev->cmdbuf[(receive - 1) - i] = param[i]; } /* * Some devices (Synaptics) perform the reset before * ACKing the reset command, and so it can take a long * time before the ACK arrives. */ timeout = command == PS2_CMD_RESET_BAT ? 1000 : 200; rc = ps2_do_sendbyte(ps2dev, command & 0xff, timeout, 2); if (rc) goto out_reset_flags; /* Send command parameters, if any. */ for (i = 0; i < send; i++) { rc = ps2_do_sendbyte(ps2dev, param[i], 200, 2); if (rc) goto out_reset_flags; } serio_continue_rx(ps2dev->serio); /* * The reset command takes a long time to execute. */ timeout = msecs_to_jiffies(command == PS2_CMD_RESET_BAT ? 4000 : 500); timeout = wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD1), timeout); if (ps2dev->cmdcnt && !(ps2dev->flags & PS2_FLAG_CMD1)) { timeout = ps2_adjust_timeout(ps2dev, command, timeout); wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD), timeout); } serio_pause_rx(ps2dev->serio); if (param) { for (i = 0; i < receive; i++) param[i] = ps2dev->cmdbuf[(receive - 1) - i]; kmsan_unpoison_memory(param, receive); } if (ps2dev->cmdcnt && (command != PS2_CMD_RESET_BAT || ps2dev->cmdcnt != 1)) { rc = -EPROTO; goto out_reset_flags; } rc = 0; out_reset_flags: ps2dev->flags = 0; serio_continue_rx(ps2dev->serio); dev_dbg(&ps2dev->serio->dev, "%02x [%*ph] - %x/%08lx [%*ph]\n", command & 0xff, send, send_param, ps2dev->nak, ps2dev->flags, receive, param ?: send_param); /* * ps_command() handles resends itself, so do not leak -EAGAIN * to the callers. */ return rc != -EAGAIN ? rc : -EPROTO; } EXPORT_SYMBOL(__ps2_command); /** * ps2_command - send a command to PS/2 device * @ps2dev: the PS/2 device that should execute the command * @param: a buffer containing parameters to be sent along with the command, * or place where the results of the command execution will be deposited, * or both * @command: command word that encodes the command itself, as well as number of * additional parameter bytes that should be sent to the device and expected * length of the command response * * Note: ps2_command() serializes the command execution so that only one * command can be executed at a time for either individual port or the entire * 8042 controller. */ int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command) { int rc; ps2_begin_command(ps2dev); rc = __ps2_command(ps2dev, param, command); ps2_end_command(ps2dev); return rc; } EXPORT_SYMBOL(ps2_command); /** * ps2_sliced_command - sends an extended PS/2 command to a mouse * @ps2dev: the PS/2 device that should execute the command * @command: command byte * * The command is sent using "sliced" syntax understood by advanced devices, * such as Logitech or Synaptics touchpads. The command is encoded as: * 0xE6 0xE8 rr 0xE8 ss 0xE8 tt 0xE8 uu where (rr*64)+(ss*16)+(tt*4)+uu * is the command. */ int ps2_sliced_command(struct ps2dev *ps2dev, u8 command) { int i; int retval; ps2_begin_command(ps2dev); retval = __ps2_command(ps2dev, NULL, PS2_CMD_SETSCALE11); if (retval) goto out; for (i = 6; i >= 0; i -= 2) { u8 d = (command >> i) & 3; retval = __ps2_command(ps2dev, &d, PS2_CMD_SETRES); if (retval) break; } out: dev_dbg(&ps2dev->serio->dev, "%02x - %d\n", command, retval); ps2_end_command(ps2dev); return retval; } EXPORT_SYMBOL(ps2_sliced_command); /** * ps2_init - initializes ps2dev structure * @ps2dev: structure to be initialized * @serio: serio port associated with the PS/2 device * @pre_receive_handler: validation handler to check basic communication state * @receive_handler: main protocol handler * * Prepares ps2dev structure for use in drivers for PS/2 devices. */ void ps2_init(struct ps2dev *ps2dev, struct serio *serio, ps2_pre_receive_handler_t pre_receive_handler, ps2_receive_handler_t receive_handler) { ps2dev->pre_receive_handler = pre_receive_handler; ps2dev->receive_handler = receive_handler; mutex_init(&ps2dev->cmd_mutex); lockdep_set_subclass(&ps2dev->cmd_mutex, serio->depth); init_waitqueue_head(&ps2dev->wait); ps2dev->serio = serio; serio_set_drvdata(serio, ps2dev); } EXPORT_SYMBOL(ps2_init); /* * ps2_handle_response() stores device's response to a command and notifies * the process waiting for completion of the command. Note that there is a * distinction between waiting for the first byte of the response, and * waiting for subsequent bytes. It is done so that callers could shorten * timeouts once first byte of response is received. */ static void ps2_handle_response(struct ps2dev *ps2dev, u8 data) { if (ps2dev->cmdcnt) ps2dev->cmdbuf[--ps2dev->cmdcnt] = data; if (ps2dev->flags & PS2_FLAG_CMD1) { ps2dev->flags &= ~PS2_FLAG_CMD1; if (ps2dev->cmdcnt) wake_up(&ps2dev->wait); } if (!ps2dev->cmdcnt) { ps2dev->flags &= ~PS2_FLAG_CMD; wake_up(&ps2dev->wait); } } /* * ps2_handle_ack() processes ACK/NAK of a command from a PS/2 device, * possibly applying workarounds for mice not acknowledging the "get ID" * command. */ static void ps2_handle_ack(struct ps2dev *ps2dev, u8 data) { switch (data) { case PS2_RET_ACK: ps2dev->nak = 0; break; case PS2_RET_NAK: ps2dev->flags |= PS2_FLAG_NAK; ps2dev->nak = PS2_RET_NAK; break; case PS2_RET_ERR: if (ps2dev->flags & PS2_FLAG_NAK) { ps2dev->flags &= ~PS2_FLAG_NAK; ps2dev->nak = PS2_RET_ERR; break; } fallthrough; /* * Workaround for mice which don't ACK the Get ID command. * These are valid mouse IDs that we recognize. */ case 0x00: case 0x03: case 0x04: if (ps2dev->flags & PS2_FLAG_WAITID) { ps2dev->nak = 0; break; } fallthrough; default: /* * Do not signal errors if we get unexpected reply while * waiting for an ACK to the initial (first) command byte: * the device might not be quiesced yet and continue * delivering data. For certain commands (such as set leds and * set repeat rate) that can be used during normal device * operation, we even pass this data byte to the normal receive * handler. * Note that we reset PS2_FLAG_WAITID flag, so the workaround * for mice not acknowledging the Get ID command only triggers * on the 1st byte; if device spews data we really want to see * a real ACK from it. */ dev_dbg(&ps2dev->serio->dev, "unexpected %#02x\n", data); if (ps2dev->flags & PS2_FLAG_PASS_NOACK) ps2dev->receive_handler(ps2dev, data); ps2dev->flags &= ~(PS2_FLAG_WAITID | PS2_FLAG_PASS_NOACK); return; } if (!ps2dev->nak) ps2dev->flags &= ~PS2_FLAG_NAK; ps2dev->flags &= ~PS2_FLAG_ACK; if (!ps2dev->nak && data != PS2_RET_ACK) ps2_handle_response(ps2dev, data); else wake_up(&ps2dev->wait); } /* * Clears state of PS/2 device after communication error by resetting majority * of flags and waking up waiters, if any. */ static void ps2_cleanup(struct ps2dev *ps2dev) { unsigned long old_flags = ps2dev->flags; /* reset all flags except last nak */ ps2dev->flags &= PS2_FLAG_NAK; if (old_flags & PS2_FLAG_ACK) ps2dev->nak = 1; if (old_flags & (PS2_FLAG_ACK | PS2_FLAG_CMD)) wake_up(&ps2dev->wait); } /** * ps2_interrupt - common interrupt handler for PS/2 devices * @serio: serio port for the device * @data: a data byte received from the device * @flags: flags such as %SERIO_PARITY or %SERIO_TIMEOUT indicating state of * the data transfer * * ps2_interrupt() invokes pre-receive handler, optionally handles command * acknowledgement and response from the device, and finally passes the data * to the main protocol handler for future processing. */ irqreturn_t ps2_interrupt(struct serio *serio, u8 data, unsigned int flags) { struct ps2dev *ps2dev = serio_get_drvdata(serio); enum ps2_disposition rc; rc = ps2dev->pre_receive_handler(ps2dev, data, flags); switch (rc) { case PS2_ERROR: ps2_cleanup(ps2dev); break; case PS2_IGNORE: break; case PS2_PROCESS: if (ps2dev->flags & PS2_FLAG_ACK) ps2_handle_ack(ps2dev, data); else if (ps2dev->flags & PS2_FLAG_CMD) ps2_handle_response(ps2dev, data); else ps2dev->receive_handler(ps2dev, data); break; } return IRQ_HANDLED; } EXPORT_SYMBOL(ps2_interrupt); MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>"); MODULE_DESCRIPTION("PS/2 driver library"); MODULE_LICENSE("GPL");
7 7 7 7 7 7 7 7 39 39 39 39 39 39 39 39 39 39 39 25 24 36 36 36 2 2 57 57 57 55 2 57 2 57 57 57 57 57 57 57 20 59 26 57 36 59 36 7 7 6 7 7 7 7 7 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 14 14 14 14 14 14 14 7 7 7 7 7 7 7 7 7 7 5 7 7 7 59 59 59 7 7 7 7 7 7 7 7 7 7 7 7 7 7 45 45 45 45 39 39 39 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2017 Intel Deutschland GmbH * Copyright (C) 2019, 2022-2024 Intel Corporation */ #include <linux/kernel.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/slab.h> #include "rate.h" #include "ieee80211_i.h" #include "debugfs.h" struct rate_control_alg { struct list_head list; const struct rate_control_ops *ops; }; static LIST_HEAD(rate_ctrl_algs); static DEFINE_MUTEX(rate_ctrl_mutex); static char *ieee80211_default_rc_algo = CONFIG_MAC80211_RC_DEFAULT; module_param(ieee80211_default_rc_algo, charp, 0644); MODULE_PARM_DESC(ieee80211_default_rc_algo, "Default rate control algorithm for mac80211 to use"); void rate_control_rate_init(struct link_sta_info *link_sta) { struct sta_info *sta = link_sta->sta; struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; ieee80211_sta_init_nss(link_sta); if (!ref) return; /* SW rate control isn't supported with MLO right now */ if (WARN_ON(ieee80211_vif_is_mld(&sta->sdata->vif))) return; rcu_read_lock(); chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; } sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; /* TODO: check for minstrel_s1g ? */ if (sband->band == NL80211_BAND_S1GHZ) { ieee80211_s1g_sta_rate_init(sta); rcu_read_unlock(); return; } spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, priv_sta); spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } void rate_control_rate_init_all_links(struct sta_info *sta) { int link_id; for (link_id = 0; link_id < ARRAY_SIZE(sta->link); link_id++) { struct link_sta_info *link_sta; link_sta = sdata_dereference(sta->link[link_id], sta->sdata); if (!link_sta) continue; rate_control_rate_init(link_sta); } } void rate_control_tx_status(struct ieee80211_local *local, struct ieee80211_tx_status *st) { struct rate_control_ref *ref = local->rate_ctrl; struct sta_info *sta = container_of(st->sta, struct sta_info, sta); void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_supported_band *sband; if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) return; sband = local->hw.wiphy->bands[st->info->band]; spin_lock_bh(&sta->rate_ctrl_lock); if (ref->ops->tx_status_ext) ref->ops->tx_status_ext(ref->priv, sband, priv_sta, st); else if (st->skb) ref->ops->tx_status(ref->priv, sband, st->sta, priv_sta, st->skb); else WARN_ON_ONCE(1); spin_unlock_bh(&sta->rate_ctrl_lock); } void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct link_sta_info *link_sta, u32 changed) { struct rate_control_ref *ref = local->rate_ctrl; struct sta_info *sta = link_sta->sta; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_chanctx_conf *chanctx_conf; if (ref && ref->ops->rate_update) { rcu_read_lock(); chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; } spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def, ista, priv_sta, changed); spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); } if (sta->uploaded) drv_link_sta_rc_update(local, sta->sdata, link_sta->pub, changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) { struct rate_control_alg *alg; if (!ops->name) return -EINVAL; mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { if (!strcmp(alg->ops->name, ops->name)) { /* don't register an algorithm twice */ WARN_ON(1); mutex_unlock(&rate_ctrl_mutex); return -EALREADY; } } alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) { mutex_unlock(&rate_ctrl_mutex); return -ENOMEM; } alg->ops = ops; list_add_tail(&alg->list, &rate_ctrl_algs); mutex_unlock(&rate_ctrl_mutex); return 0; } EXPORT_SYMBOL(ieee80211_rate_control_register); void ieee80211_rate_control_unregister(const struct rate_control_ops *ops) { struct rate_control_alg *alg; mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { if (alg->ops == ops) { list_del(&alg->list); kfree(alg); break; } } mutex_unlock(&rate_ctrl_mutex); } EXPORT_SYMBOL(ieee80211_rate_control_unregister); static const struct rate_control_ops * ieee80211_try_rate_control_ops_get(const char *name) { struct rate_control_alg *alg; const struct rate_control_ops *ops = NULL; if (!name) return NULL; mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { if (!strcmp(alg->ops->name, name)) { ops = alg->ops; break; } } mutex_unlock(&rate_ctrl_mutex); return ops; } /* Get the rate control algorithm. */ static const struct rate_control_ops * ieee80211_rate_control_ops_get(const char *name) { const struct rate_control_ops *ops; const char *alg_name; kernel_param_lock(THIS_MODULE); if (!name) alg_name = ieee80211_default_rc_algo; else alg_name = name; ops = ieee80211_try_rate_control_ops_get(alg_name); if (!ops && name) /* try default if specific alg requested but not found */ ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo); /* Note: check for > 0 is intentional to avoid clang warning */ if (!ops && (strlen(CONFIG_MAC80211_RC_DEFAULT) > 0)) /* try built-in one if specific alg requested but not found */ ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); kernel_param_unlock(THIS_MODULE); return ops; } #ifdef CONFIG_MAC80211_DEBUGFS static ssize_t rcname_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct rate_control_ref *ref = file->private_data; int len = strlen(ref->ops->name); return simple_read_from_buffer(userbuf, count, ppos, ref->ops->name, len); } const struct debugfs_short_fops rcname_ops = { .read = rcname_read, .llseek = default_llseek, }; #endif static struct rate_control_ref * rate_control_alloc(const char *name, struct ieee80211_local *local) { struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) return NULL; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto free; ref->priv = ref->ops->alloc(&local->hw); if (!ref->priv) goto free; return ref; free: kfree(ref); return NULL; } static void rate_control_free(struct ieee80211_local *local, struct rate_control_ref *ctrl_ref) { ctrl_ref->ops->free(ctrl_ref->priv); #ifdef CONFIG_MAC80211_DEBUGFS debugfs_remove_recursive(local->debugfs.rcdir); local->debugfs.rcdir = NULL; #endif kfree(ctrl_ref); } void ieee80211_check_rate_mask(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; u32 user_mask, basic_rates = link->conf->basic_rates; enum nl80211_band band; if (WARN_ON(!link->conf->chanreq.oper.chan)) return; band = link->conf->chanreq.oper.chan->band; if (band == NL80211_BAND_S1GHZ) { /* TODO */ return; } if (WARN_ON_ONCE(!basic_rates)) return; user_mask = sdata->rc_rateidx_mask[band]; sband = local->hw.wiphy->bands[band]; if (user_mask & basic_rates) return; sdata_dbg(sdata, "no overlap between basic rates (0x%x) and user mask (0x%x on band %d) - clearing the latter", basic_rates, user_mask, band); sdata->rc_rateidx_mask[band] = (1 << sband->n_bitrates) - 1; } static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc) { struct sk_buff *skb = txrc->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); return (info->flags & (IEEE80211_TX_CTL_NO_ACK | IEEE80211_TX_CTL_USE_MINRATE)) || !ieee80211_is_tx_data(skb); } static void rc_send_low_basicrate(struct ieee80211_tx_rate *rate, u32 basic_rates, struct ieee80211_supported_band *sband) { u8 i; if (sband->band == NL80211_BAND_S1GHZ) { /* TODO */ rate->flags |= IEEE80211_TX_RC_S1G_MCS; rate->idx = 0; return; } if (basic_rates == 0) return; /* assume basic rates unknown and accept rate */ if (rate->idx < 0) return; if (basic_rates & (1 << rate->idx)) return; /* selected rate is a basic rate */ for (i = rate->idx + 1; i <= sband->n_bitrates; i++) { if (basic_rates & (1 << i)) { rate->idx = i; return; } } /* could not find a basic rate; use original selection */ } static void __rate_control_send_low(struct ieee80211_hw *hw, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, struct ieee80211_tx_info *info, u32 rate_mask) { int i; u32 rate_flags = ieee80211_chandef_rate_flags(&hw->conf.chandef); if (sband->band == NL80211_BAND_S1GHZ) { info->control.rates[0].flags |= IEEE80211_TX_RC_S1G_MCS; info->control.rates[0].idx = 0; return; } if ((sband->band == NL80211_BAND_2GHZ) && (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) rate_flags |= IEEE80211_RATE_ERP_G; info->control.rates[0].idx = 0; for (i = 0; i < sband->n_bitrates; i++) { if (!(rate_mask & BIT(i))) continue; if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; if (!rate_supported(sta, sband->band, i)) continue; info->control.rates[0].idx = i; break; } WARN_ONCE(i == sband->n_bitrates, "no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n", sta ? sta->addr : NULL, sta ? sta->deflink.supp_rates[sband->band] : -1, sband->band, rate_mask, rate_flags); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : hw->max_rate_tries; info->control.skip_table = 1; } static bool rate_control_send_low(struct ieee80211_sta *pubsta, struct ieee80211_tx_rate_control *txrc) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); struct ieee80211_supported_band *sband = txrc->sband; struct sta_info *sta; int mcast_rate; bool use_basicrate = false; if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) { __rate_control_send_low(txrc->hw, sband, pubsta, info, txrc->rate_idx_mask); if (!pubsta && txrc->bss) { mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; if (mcast_rate > 0) { info->control.rates[0].idx = mcast_rate - 1; return true; } use_basicrate = true; } else if (pubsta) { sta = container_of(pubsta, struct sta_info, sta); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) use_basicrate = true; } if (use_basicrate) rc_send_low_basicrate(&info->control.rates[0], txrc->bss_conf->basic_rates, sband); return true; } return false; } static bool rate_idx_match_legacy_mask(s8 *rate_idx, int n_bitrates, u32 mask) { int j; /* See whether the selected rate or anything below it is allowed. */ for (j = *rate_idx; j >= 0; j--) { if (mask & (1 << j)) { /* Okay, found a suitable rate. Use it. */ *rate_idx = j; return true; } } /* Try to find a higher rate that would be allowed */ for (j = *rate_idx + 1; j < n_bitrates; j++) { if (mask & (1 << j)) { /* Okay, found a suitable rate. Use it. */ *rate_idx = j; return true; } } return false; } static bool rate_idx_match_mcs_mask(s8 *rate_idx, u8 *mcs_mask) { int i, j; int ridx, rbit; ridx = *rate_idx / 8; rbit = *rate_idx % 8; /* sanity check */ if (ridx < 0 || ridx >= IEEE80211_HT_MCS_MASK_LEN) return false; /* See whether the selected rate or anything below it is allowed. */ for (i = ridx; i >= 0; i--) { for (j = rbit; j >= 0; j--) if (mcs_mask[i] & BIT(j)) { *rate_idx = i * 8 + j; return true; } rbit = 7; } /* Try to find a higher rate that would be allowed */ ridx = (*rate_idx + 1) / 8; rbit = (*rate_idx + 1) % 8; for (i = ridx; i < IEEE80211_HT_MCS_MASK_LEN; i++) { for (j = rbit; j < 8; j++) if (mcs_mask[i] & BIT(j)) { *rate_idx = i * 8 + j; return true; } rbit = 0; } return false; } static bool rate_idx_match_vht_mcs_mask(s8 *rate_idx, u16 *vht_mask) { int i, j; int ridx, rbit; ridx = *rate_idx >> 4; rbit = *rate_idx & 0xf; if (ridx < 0 || ridx >= NL80211_VHT_NSS_MAX) return false; /* See whether the selected rate or anything below it is allowed. */ for (i = ridx; i >= 0; i--) { for (j = rbit; j >= 0; j--) { if (vht_mask[i] & BIT(j)) { *rate_idx = (i << 4) | j; return true; } } rbit = 15; } /* Try to find a higher rate that would be allowed */ ridx = (*rate_idx + 1) >> 4; rbit = (*rate_idx + 1) & 0xf; for (i = ridx; i < NL80211_VHT_NSS_MAX; i++) { for (j = rbit; j < 16; j++) { if (vht_mask[i] & BIT(j)) { *rate_idx = (i << 4) | j; return true; } } rbit = 0; } return false; } static void rate_idx_match_mask(s8 *rate_idx, u16 *rate_flags, struct ieee80211_supported_band *sband, enum nl80211_chan_width chan_width, u32 mask, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], u16 vht_mask[NL80211_VHT_NSS_MAX]) { if (*rate_flags & IEEE80211_TX_RC_VHT_MCS) { /* handle VHT rates */ if (rate_idx_match_vht_mcs_mask(rate_idx, vht_mask)) return; *rate_idx = 0; /* keep protection flags */ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS | IEEE80211_TX_RC_USE_CTS_PROTECT | IEEE80211_TX_RC_USE_SHORT_PREAMBLE); *rate_flags |= IEEE80211_TX_RC_MCS; if (chan_width == NL80211_CHAN_WIDTH_40) *rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_idx_match_mcs_mask(rate_idx, mcs_mask)) return; /* also try the legacy rates. */ *rate_flags &= ~(IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_40_MHZ_WIDTH); if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates, mask)) return; } else if (*rate_flags & IEEE80211_TX_RC_MCS) { /* handle HT rates */ if (rate_idx_match_mcs_mask(rate_idx, mcs_mask)) return; /* also try the legacy rates. */ *rate_idx = 0; /* keep protection flags */ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS | IEEE80211_TX_RC_USE_CTS_PROTECT | IEEE80211_TX_RC_USE_SHORT_PREAMBLE); if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates, mask)) return; } else { /* handle legacy rates */ if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates, mask)) return; /* if HT BSS, and we handle a data frame, also try HT rates */ switch (chan_width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: return; default: break; } *rate_idx = 0; /* keep protection flags */ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS | IEEE80211_TX_RC_USE_CTS_PROTECT | IEEE80211_TX_RC_USE_SHORT_PREAMBLE); *rate_flags |= IEEE80211_TX_RC_MCS; if (chan_width == NL80211_CHAN_WIDTH_40) *rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_idx_match_mcs_mask(rate_idx, mcs_mask)) return; } /* * Uh.. No suitable rate exists. This should not really happen with * sane TX rate mask configurations. However, should someone manage to * configure supported rates and TX rate mask in incompatible way, * allow the frame to be transmitted with whatever the rate control * selected. */ } static void rate_fixup_ratelist(struct ieee80211_vif *vif, struct ieee80211_supported_band *sband, struct ieee80211_tx_info *info, struct ieee80211_tx_rate *rates, int max_rates) { struct ieee80211_rate *rate; bool inval = false; int i; /* * Set up the RTS/CTS rate as the fastest basic rate * that is not faster than the data rate unless there * is no basic rate slower than the data rate, in which * case we pick the slowest basic rate * * XXX: Should this check all retry rates? */ if (!(rates[0].flags & (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS))) { u32 basic_rates = vif->bss_conf.basic_rates; s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0; rate = &sband->bitrates[rates[0].idx]; for (i = 0; i < sband->n_bitrates; i++) { /* must be a basic rate */ if (!(basic_rates & BIT(i))) continue; /* must not be faster than the data rate */ if (sband->bitrates[i].bitrate > rate->bitrate) continue; /* maximum */ if (sband->bitrates[baserate].bitrate < sband->bitrates[i].bitrate) baserate = i; } info->control.rts_cts_rate_idx = baserate; } for (i = 0; i < max_rates; i++) { /* * make sure there's no valid rate following * an invalid one, just in case drivers don't * take the API seriously to stop at -1. */ if (inval) { rates[i].idx = -1; continue; } if (rates[i].idx < 0) { inval = true; continue; } /* * For now assume MCS is already set up correctly, this * needs to be fixed. */ if (rates[i].flags & IEEE80211_TX_RC_MCS) { WARN_ON(rates[i].idx > 76); if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && info->control.use_cts_prot) rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; continue; } if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); continue; } /* set up RTS protection if desired */ if (info->control.use_rts) { rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS; info->control.use_cts_prot = false; } /* RC is busted */ if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { rates[i].idx = -1; continue; } rate = &sband->bitrates[rates[i].idx]; /* set up short preamble */ if (info->control.short_preamble && rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; /* set up G protection */ if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && info->control.use_cts_prot && rate->flags & IEEE80211_RATE_ERP_G) rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; } } static void rate_control_fill_sta_table(struct ieee80211_sta *sta, struct ieee80211_tx_info *info, struct ieee80211_tx_rate *rates, int max_rates) { struct ieee80211_sta_rates *ratetbl = NULL; int i; if (sta && !info->control.skip_table) ratetbl = rcu_dereference(sta->rates); /* Fill remaining rate slots with data from the sta rate table. */ max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE); for (i = 0; i < max_rates; i++) { if (i < ARRAY_SIZE(info->control.rates) && info->control.rates[i].idx >= 0 && info->control.rates[i].count) { if (rates != info->control.rates) rates[i] = info->control.rates[i]; } else if (ratetbl) { rates[i].idx = ratetbl->rate[i].idx; rates[i].flags = ratetbl->rate[i].flags; if (info->control.use_rts) rates[i].count = ratetbl->rate[i].count_rts; else if (info->control.use_cts_prot) rates[i].count = ratetbl->rate[i].count_cts; else rates[i].count = ratetbl->rate[i].count; } else { rates[i].idx = -1; rates[i].count = 0; } if (rates[i].idx < 0 || !rates[i].count) break; } } static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, u32 *mask, u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], u16 vht_mask[NL80211_VHT_NSS_MAX]) { u32 i, flags; *mask = sdata->rc_rateidx_mask[sband->band]; flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); for (i = 0; i < sband->n_bitrates; i++) { if ((flags & sband->bitrates[i].flags) != flags) *mask &= ~BIT(i); } if (*mask == (1 << sband->n_bitrates) - 1 && !sdata->rc_has_mcs_mask[sband->band] && !sdata->rc_has_vht_mcs_mask[sband->band]) return false; if (sdata->rc_has_mcs_mask[sband->band]) memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[sband->band], IEEE80211_HT_MCS_MASK_LEN); else memset(mcs_mask, 0xff, IEEE80211_HT_MCS_MASK_LEN); if (sdata->rc_has_vht_mcs_mask[sband->band]) memcpy(vht_mask, sdata->rc_rateidx_vht_mcs_mask[sband->band], sizeof(u16) * NL80211_VHT_NSS_MAX); else memset(vht_mask, 0xff, sizeof(u16) * NL80211_VHT_NSS_MAX); if (sta) { __le16 sta_vht_cap; u16 sta_vht_mask[NL80211_VHT_NSS_MAX]; /* Filter out rates that the STA does not support */ *mask &= sta->deflink.supp_rates[sband->band]; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) mcs_mask[i] &= sta->deflink.ht_cap.mcs.rx_mask[i]; sta_vht_cap = sta->deflink.vht_cap.vht_mcs.rx_mcs_map; ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) vht_mask[i] &= sta_vht_mask[i]; } return true; } static void rate_control_apply_mask_ratetbl(struct sta_info *sta, struct ieee80211_supported_band *sband, struct ieee80211_sta_rates *rates) { int i; u32 mask; u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mask[NL80211_VHT_NSS_MAX]; enum nl80211_chan_width chan_width; if (!rate_control_cap_mask(sta->sdata, sband, &sta->sta, &mask, mcs_mask, vht_mask)) return; chan_width = sta->sdata->vif.bss_conf.chanreq.oper.width; for (i = 0; i < IEEE80211_TX_RATE_TABLE_SIZE; i++) { if (rates->rate[i].idx < 0) break; rate_idx_match_mask(&rates->rate[i].idx, &rates->rate[i].flags, sband, chan_width, mask, mcs_mask, vht_mask); } } static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, struct ieee80211_supported_band *sband, struct ieee80211_tx_rate *rates, int max_rates) { enum nl80211_chan_width chan_width; u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; u32 mask; u16 rate_flags, vht_mask[NL80211_VHT_NSS_MAX]; int i; /* * Try to enforce the rateidx mask the user wanted. skip this if the * default mask (allow all rates) is used to save some processing for * the common case. */ if (!rate_control_cap_mask(sdata, sband, sta, &mask, mcs_mask, vht_mask)) return; /* * Make sure the rate index selected for each TX rate is * included in the configured mask and change the rate indexes * if needed. */ chan_width = sdata->vif.bss_conf.chanreq.oper.width; for (i = 0; i < max_rates; i++) { /* Skip invalid rates */ if (rates[i].idx < 0) break; rate_flags = rates[i].flags; rate_idx_match_mask(&rates[i].idx, &rate_flags, sband, chan_width, mask, mcs_mask, vht_mask); rates[i].flags = rate_flags; } } void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct sk_buff *skb, struct ieee80211_tx_rate *dest, int max_rates) { struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; u32 mask = ~0; rate_control_fill_sta_table(sta, info, dest, max_rates); if (!vif) return; sdata = vif_to_sdata(vif); sband = sdata->local->hw.wiphy->bands[info->band]; if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) mask = sdata->rc_rateidx_mask[info->band]; if (dest[0].idx < 0) __rate_control_send_low(&sdata->local->hw, sband, sta, info, mask); if (sta) rate_fixup_ratelist(vif, sband, info, dest, max_rates); } EXPORT_SYMBOL(ieee80211_get_tx_rates); void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc) { struct rate_control_ref *ref = sdata->local->rate_ctrl; void *priv_sta = NULL; struct ieee80211_sta *ista = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { info->control.rates[i].idx = -1; info->control.rates[i].flags = 0; info->control.rates[i].count = 0; } if (rate_control_send_low(sta ? &sta->sta : NULL, txrc)) return; if (ieee80211_hw_check(&sdata->local->hw, HAS_RATE_CONTROL)) return; if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) { ista = &sta->sta; priv_sta = sta->rate_ctrl_priv; } if (ista) { spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); spin_unlock_bh(&sta->rate_ctrl_lock); } else { rate_control_send_low(NULL, txrc); } if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_RC_TABLE)) return; ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, info->control.rates, ARRAY_SIZE(info->control.rates)); } int rate_control_set_rates(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, struct ieee80211_sta_rates *rates) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sta_rates *old; struct ieee80211_supported_band *sband; sband = ieee80211_get_sband(sta->sdata); if (!sband) return -EINVAL; rate_control_apply_mask_ratetbl(sta, sband, rates); /* * mac80211 guarantees that this function will not be called * concurrently, so the following RCU access is safe, even without * extra locking. This can not be checked easily, so we just set * the condition to true. */ old = rcu_dereference_protected(pubsta->rates, true); rcu_assign_pointer(pubsta->rates, rates); if (old) kfree_rcu(old, rcu_head); if (sta->uploaded) drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); return 0; } EXPORT_SYMBOL(rate_control_set_rates); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name) { struct rate_control_ref *ref; ASSERT_RTNL(); if (local->open_count) return -EBUSY; if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { if (WARN_ON(!local->ops->set_rts_threshold)) return -EINVAL; return 0; } ref = rate_control_alloc(name, local); if (!ref) { wiphy_warn(local->hw.wiphy, "Failed to select rate control algorithm\n"); return -ENOENT; } WARN_ON(local->rate_ctrl); local->rate_ctrl = ref; wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n", ref->ops->name); return 0; } void rate_control_deinitialize(struct ieee80211_local *local) { struct rate_control_ref *ref; ref = local->rate_ctrl; if (!ref) return; local->rate_ctrl = NULL; rate_control_free(local, ref); }
17 18 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0-only /* * linux/lib/crc-ccitt.c */ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-ccitt.h> /* * This mysterious table is just the CRC of each possible byte. It can be * computed using the standard bit-at-a-time methods. The polynomial can * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. * Add the implicit x^16, and you have the standard CRC-CCITT. */ u16 const crc_ccitt_table[256] = { 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 }; EXPORT_SYMBOL(crc_ccitt_table); /** * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer */ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) { while (len--) crc = crc_ccitt_byte(crc, *buffer++); return crc; } EXPORT_SYMBOL(crc_ccitt); MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL");
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 // SPDX-License-Identifier: GPL-2.0-or-later /* * Abilis Systems Single DVB-T Receiver * Copyright (C) 2008 Pierrick Hascoet <pierrick.hascoet@abilis.com> * Copyright (C) 2010 Devin Heitmueller <dheitmueller@kernellabs.com> */ #include <media/dvb_frontend.h> #include "as102_fe.h" struct as102_state { struct dvb_frontend frontend; struct as10x_demod_stats demod_stats; const struct as102_fe_ops *ops; void *priv; uint8_t elna_cfg; /* signal strength */ uint16_t signal_strength; /* bit error rate */ uint32_t ber; }; static uint8_t as102_fe_get_code_rate(enum fe_code_rate arg) { uint8_t c; switch (arg) { case FEC_1_2: c = CODE_RATE_1_2; break; case FEC_2_3: c = CODE_RATE_2_3; break; case FEC_3_4: c = CODE_RATE_3_4; break; case FEC_5_6: c = CODE_RATE_5_6; break; case FEC_7_8: c = CODE_RATE_7_8; break; default: c = CODE_RATE_UNKNOWN; break; } return c; } static int as102_fe_set_frontend(struct dvb_frontend *fe) { struct as102_state *state = fe->demodulator_priv; struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct as10x_tune_args tune_args = { 0 }; /* set frequency */ tune_args.freq = c->frequency / 1000; /* fix interleaving_mode */ tune_args.interleaving_mode = INTLV_NATIVE; switch (c->bandwidth_hz) { case 8000000: tune_args.bandwidth = BW_8_MHZ; break; case 7000000: tune_args.bandwidth = BW_7_MHZ; break; case 6000000: tune_args.bandwidth = BW_6_MHZ; break; default: tune_args.bandwidth = BW_8_MHZ; } switch (c->guard_interval) { case GUARD_INTERVAL_1_32: tune_args.guard_interval = GUARD_INT_1_32; break; case GUARD_INTERVAL_1_16: tune_args.guard_interval = GUARD_INT_1_16; break; case GUARD_INTERVAL_1_8: tune_args.guard_interval = GUARD_INT_1_8; break; case GUARD_INTERVAL_1_4: tune_args.guard_interval = GUARD_INT_1_4; break; case GUARD_INTERVAL_AUTO: default: tune_args.guard_interval = GUARD_UNKNOWN; break; } switch (c->modulation) { case QPSK: tune_args.modulation = CONST_QPSK; break; case QAM_16: tune_args.modulation = CONST_QAM16; break; case QAM_64: tune_args.modulation = CONST_QAM64; break; default: tune_args.modulation = CONST_UNKNOWN; break; } switch (c->transmission_mode) { case TRANSMISSION_MODE_2K: tune_args.transmission_mode = TRANS_MODE_2K; break; case TRANSMISSION_MODE_8K: tune_args.transmission_mode = TRANS_MODE_8K; break; default: tune_args.transmission_mode = TRANS_MODE_UNKNOWN; } switch (c->hierarchy) { case HIERARCHY_NONE: tune_args.hierarchy = HIER_NONE; break; case HIERARCHY_1: tune_args.hierarchy = HIER_ALPHA_1; break; case HIERARCHY_2: tune_args.hierarchy = HIER_ALPHA_2; break; case HIERARCHY_4: tune_args.hierarchy = HIER_ALPHA_4; break; case HIERARCHY_AUTO: tune_args.hierarchy = HIER_UNKNOWN; break; } pr_debug("as102: tuner parameters: freq: %d bw: 0x%02x gi: 0x%02x\n", c->frequency, tune_args.bandwidth, tune_args.guard_interval); /* * Detect a hierarchy selection * if HP/LP are both set to FEC_NONE, HP will be selected. */ if ((tune_args.hierarchy != HIER_NONE) && ((c->code_rate_LP == FEC_NONE) || (c->code_rate_HP == FEC_NONE))) { if (c->code_rate_LP == FEC_NONE) { tune_args.hier_select = HIER_HIGH_PRIORITY; tune_args.code_rate = as102_fe_get_code_rate(c->code_rate_HP); } if (c->code_rate_HP == FEC_NONE) { tune_args.hier_select = HIER_LOW_PRIORITY; tune_args.code_rate = as102_fe_get_code_rate(c->code_rate_LP); } pr_debug("as102: \thierarchy: 0x%02x selected: %s code_rate_%s: 0x%02x\n", tune_args.hierarchy, tune_args.hier_select == HIER_HIGH_PRIORITY ? "HP" : "LP", tune_args.hier_select == HIER_HIGH_PRIORITY ? "HP" : "LP", tune_args.code_rate); } else { tune_args.code_rate = as102_fe_get_code_rate(c->code_rate_HP); } /* Set frontend arguments */ return state->ops->set_tune(state->priv, &tune_args); } static int as102_fe_get_frontend(struct dvb_frontend *fe, struct dtv_frontend_properties *c) { struct as102_state *state = fe->demodulator_priv; int ret = 0; struct as10x_tps tps = { 0 }; /* send abilis command: GET_TPS */ ret = state->ops->get_tps(state->priv, &tps); if (ret < 0) return ret; /* extract constellation */ switch (tps.modulation) { case CONST_QPSK: c->modulation = QPSK; break; case CONST_QAM16: c->modulation = QAM_16; break; case CONST_QAM64: c->modulation = QAM_64; break; } /* extract hierarchy */ switch (tps.hierarchy) { case HIER_NONE: c->hierarchy = HIERARCHY_NONE; break; case HIER_ALPHA_1: c->hierarchy = HIERARCHY_1; break; case HIER_ALPHA_2: c->hierarchy = HIERARCHY_2; break; case HIER_ALPHA_4: c->hierarchy = HIERARCHY_4; break; } /* extract code rate HP */ switch (tps.code_rate_HP) { case CODE_RATE_1_2: c->code_rate_HP = FEC_1_2; break; case CODE_RATE_2_3: c->code_rate_HP = FEC_2_3; break; case CODE_RATE_3_4: c->code_rate_HP = FEC_3_4; break; case CODE_RATE_5_6: c->code_rate_HP = FEC_5_6; break; case CODE_RATE_7_8: c->code_rate_HP = FEC_7_8; break; } /* extract code rate LP */ switch (tps.code_rate_LP) { case CODE_RATE_1_2: c->code_rate_LP = FEC_1_2; break; case CODE_RATE_2_3: c->code_rate_LP = FEC_2_3; break; case CODE_RATE_3_4: c->code_rate_LP = FEC_3_4; break; case CODE_RATE_5_6: c->code_rate_LP = FEC_5_6; break; case CODE_RATE_7_8: c->code_rate_LP = FEC_7_8; break; } /* extract guard interval */ switch (tps.guard_interval) { case GUARD_INT_1_32: c->guard_interval = GUARD_INTERVAL_1_32; break; case GUARD_INT_1_16: c->guard_interval = GUARD_INTERVAL_1_16; break; case GUARD_INT_1_8: c->guard_interval = GUARD_INTERVAL_1_8; break; case GUARD_INT_1_4: c->guard_interval = GUARD_INTERVAL_1_4; break; } /* extract transmission mode */ switch (tps.transmission_mode) { case TRANS_MODE_2K: c->transmission_mode = TRANSMISSION_MODE_2K; break; case TRANS_MODE_8K: c->transmission_mode = TRANSMISSION_MODE_8K; break; } return 0; } static int as102_fe_get_tune_settings(struct dvb_frontend *fe, struct dvb_frontend_tune_settings *settings) { settings->min_delay_ms = 1000; return 0; } static int as102_fe_read_status(struct dvb_frontend *fe, enum fe_status *status) { int ret = 0; struct as102_state *state = fe->demodulator_priv; struct as10x_tune_status tstate = { 0 }; /* send abilis command: GET_TUNE_STATUS */ ret = state->ops->get_status(state->priv, &tstate); if (ret < 0) return ret; state->signal_strength = tstate.signal_strength; state->ber = tstate.BER; switch (tstate.tune_state) { case TUNE_STATUS_SIGNAL_DVB_OK: *status = FE_HAS_SIGNAL | FE_HAS_CARRIER; break; case TUNE_STATUS_STREAM_DETECTED: *status = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_SYNC | FE_HAS_VITERBI; break; case TUNE_STATUS_STREAM_TUNED: *status = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_SYNC | FE_HAS_LOCK | FE_HAS_VITERBI; break; default: *status = TUNE_STATUS_NOT_TUNED; } pr_debug("as102: tuner status: 0x%02x, strength %d, per: %d, ber: %d\n", tstate.tune_state, tstate.signal_strength, tstate.PER, tstate.BER); if (!(*status & FE_HAS_LOCK)) { memset(&state->demod_stats, 0, sizeof(state->demod_stats)); return 0; } ret = state->ops->get_stats(state->priv, &state->demod_stats); if (ret < 0) memset(&state->demod_stats, 0, sizeof(state->demod_stats)); return ret; } /* * Note: * - in AS102 SNR=MER * - the SNR will be returned in linear terms, i.e. not in dB * - the accuracy equals ±2dB for a SNR range from 4dB to 30dB * - the accuracy is >2dB for SNR values outside this range */ static int as102_fe_read_snr(struct dvb_frontend *fe, u16 *snr) { struct as102_state *state = fe->demodulator_priv; *snr = state->demod_stats.mer; return 0; } static int as102_fe_read_ber(struct dvb_frontend *fe, u32 *ber) { struct as102_state *state = fe->demodulator_priv; *ber = state->ber; return 0; } static int as102_fe_read_signal_strength(struct dvb_frontend *fe, u16 *strength) { struct as102_state *state = fe->demodulator_priv; *strength = (((0xffff * 400) * state->signal_strength + 41000) * 2); return 0; } static int as102_fe_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks) { struct as102_state *state = fe->demodulator_priv; if (state->demod_stats.has_started) *ucblocks = state->demod_stats.bad_frame_count; else *ucblocks = 0; return 0; } static int as102_fe_ts_bus_ctrl(struct dvb_frontend *fe, int acquire) { struct as102_state *state = fe->demodulator_priv; return state->ops->stream_ctrl(state->priv, acquire, state->elna_cfg); } static void as102_fe_release(struct dvb_frontend *fe) { struct as102_state *state = fe->demodulator_priv; kfree(state); } static const struct dvb_frontend_ops as102_fe_ops = { .delsys = { SYS_DVBT }, .info = { .name = "Abilis AS102 DVB-T", .frequency_min_hz = 174 * MHz, .frequency_max_hz = 862 * MHz, .frequency_stepsize_hz = 166667, .caps = FE_CAN_INVERSION_AUTO | FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QPSK | FE_CAN_QAM_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_HIERARCHY_AUTO | FE_CAN_RECOVER | FE_CAN_MUTE_TS }, .set_frontend = as102_fe_set_frontend, .get_frontend = as102_fe_get_frontend, .get_tune_settings = as102_fe_get_tune_settings, .read_status = as102_fe_read_status, .read_snr = as102_fe_read_snr, .read_ber = as102_fe_read_ber, .read_signal_strength = as102_fe_read_signal_strength, .read_ucblocks = as102_fe_read_ucblocks, .ts_bus_ctrl = as102_fe_ts_bus_ctrl, .release = as102_fe_release, }; struct dvb_frontend *as102_attach(const char *name, const struct as102_fe_ops *ops, void *priv, uint8_t elna_cfg) { struct as102_state *state; struct dvb_frontend *fe; state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; fe = &state->frontend; fe->demodulator_priv = state; state->ops = ops; state->priv = priv; state->elna_cfg = elna_cfg; /* init frontend callback ops */ memcpy(&fe->ops, &as102_fe_ops, sizeof(struct dvb_frontend_ops)); strscpy(fe->ops.info.name, name, sizeof(fe->ops.info.name)); return fe; } EXPORT_SYMBOL_GPL(as102_attach); MODULE_DESCRIPTION("as102-fe"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pierrick Hascoet <pierrick.hascoet@abilis.com>");
20 44 44 44 44 44 44 20 44 20 4 4 43 43 43 43 42 44 44 44 8 8 8 8 8 5 8 5 4 4 6 6 6 8 8 8 8 8 42 42 44 44 44 45 42 42 42 42 45 45 9 6 5 5 5 5 4 4 3 4 2 2 2 2 5 5 3 2 2 1 1 34 34 34 32 4 32 32 32 32 32 5 33 8 8 8 8 8 8 6 3 3 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ #include <linux/skmsg.h> #include <linux/skbuff.h> #include <linux/scatterlist.h> #include <net/sock.h> #include <net/tcp.h> #include <net/tls.h> #include <trace/events/sock.h> static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) { if (msg->sg.end > msg->sg.start && elem_first_coalesce < msg->sg.end) return true; if (msg->sg.end < msg->sg.start && (elem_first_coalesce > msg->sg.start || elem_first_coalesce < msg->sg.end)) return true; return false; } int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce) { struct page_frag *pfrag = sk_page_frag(sk); u32 osize = msg->sg.size; int ret = 0; len -= msg->sg.size; while (len > 0) { struct scatterlist *sge; u32 orig_offset; int use, i; if (!sk_page_frag_refill(sk, pfrag)) { ret = -ENOMEM; goto msg_trim; } orig_offset = pfrag->offset; use = min_t(int, len, pfrag->size - orig_offset); if (!sk_wmem_schedule(sk, use)) { ret = -ENOMEM; goto msg_trim; } i = msg->sg.end; sk_msg_iter_var_prev(i); sge = &msg->sg.data[i]; if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) && sg_page(sge) == pfrag->page && sge->offset + sge->length == orig_offset) { sge->length += use; } else { if (sk_msg_full(msg)) { ret = -ENOSPC; break; } sge = &msg->sg.data[msg->sg.end]; sg_unmark_end(sge); sg_set_page(sge, pfrag->page, use, orig_offset); get_page(pfrag->page); sk_msg_iter_next(msg, end); } sk_mem_charge(sk, use); msg->sg.size += use; pfrag->offset += use; len -= use; } return ret; msg_trim: sk_msg_trim(sk, msg, osize); return ret; } EXPORT_SYMBOL_GPL(sk_msg_alloc); int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, u32 off, u32 len) { int i = src->sg.start; struct scatterlist *sge = sk_msg_elem(src, i); struct scatterlist *sgd = NULL; u32 sge_len, sge_off; while (off) { if (sge->length > off) break; off -= sge->length; sk_msg_iter_var_next(i); if (i == src->sg.end && off) return -ENOSPC; sge = sk_msg_elem(src, i); } while (len) { sge_len = sge->length - off; if (sge_len > len) sge_len = len; if (dst->sg.end) sgd = sk_msg_elem(dst, dst->sg.end - 1); if (sgd && (sg_page(sge) == sg_page(sgd)) && (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) { sgd->length += sge_len; dst->sg.size += sge_len; } else if (!sk_msg_full(dst)) { sge_off = sge->offset + off; sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); } else { return -ENOSPC; } off = 0; len -= sge_len; sk_mem_charge(sk, sge_len); sk_msg_iter_var_next(i); if (i == src->sg.end && len) return -ENOSPC; sge = sk_msg_elem(src, i); } return 0; } EXPORT_SYMBOL_GPL(sk_msg_clone); void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes) { int i = msg->sg.start; do { struct scatterlist *sge = sk_msg_elem(msg, i); if (bytes < sge->length) { sge->length -= bytes; sge->offset += bytes; sk_mem_uncharge(sk, bytes); break; } sk_mem_uncharge(sk, sge->length); bytes -= sge->length; sge->length = 0; sge->offset = 0; sk_msg_iter_var_next(i); } while (bytes && i != msg->sg.end); msg->sg.start = i; } EXPORT_SYMBOL_GPL(sk_msg_return_zero); void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes) { int i = msg->sg.start; do { struct scatterlist *sge = &msg->sg.data[i]; int uncharge = (bytes < sge->length) ? bytes : sge->length; sk_mem_uncharge(sk, uncharge); bytes -= uncharge; sk_msg_iter_var_next(i); } while (i != msg->sg.end); } EXPORT_SYMBOL_GPL(sk_msg_return); static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i, bool charge) { struct scatterlist *sge = sk_msg_elem(msg, i); u32 len = sge->length; /* When the skb owns the memory we free it from consume_skb path. */ if (!msg->skb) { if (charge) sk_mem_uncharge(sk, len); put_page(sg_page(sge)); } memset(sge, 0, sizeof(*sge)); return len; } static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i, bool charge) { struct scatterlist *sge = sk_msg_elem(msg, i); int freed = 0; while (msg->sg.size) { msg->sg.size -= sge->length; freed += sk_msg_free_elem(sk, msg, i, charge); sk_msg_iter_var_next(i); sk_msg_check_to_free(msg, i, msg->sg.size); sge = sk_msg_elem(msg, i); } consume_skb(msg->skb); sk_msg_init(msg); return freed; } int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg) { return __sk_msg_free(sk, msg, msg->sg.start, false); } EXPORT_SYMBOL_GPL(sk_msg_free_nocharge); int sk_msg_free(struct sock *sk, struct sk_msg *msg) { return __sk_msg_free(sk, msg, msg->sg.start, true); } EXPORT_SYMBOL_GPL(sk_msg_free); static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes, bool charge) { struct scatterlist *sge; u32 i = msg->sg.start; while (bytes) { sge = sk_msg_elem(msg, i); if (!sge->length) break; if (bytes < sge->length) { if (charge) sk_mem_uncharge(sk, bytes); sge->length -= bytes; sge->offset += bytes; msg->sg.size -= bytes; break; } msg->sg.size -= sge->length; bytes -= sge->length; sk_msg_free_elem(sk, msg, i, charge); sk_msg_iter_var_next(i); sk_msg_check_to_free(msg, i, bytes); } msg->sg.start = i; } void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes) { __sk_msg_free_partial(sk, msg, bytes, true); } EXPORT_SYMBOL_GPL(sk_msg_free_partial); void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, u32 bytes) { __sk_msg_free_partial(sk, msg, bytes, false); } void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len) { int trim = msg->sg.size - len; u32 i = msg->sg.end; if (trim <= 0) { WARN_ON(trim < 0); return; } sk_msg_iter_var_prev(i); msg->sg.size = len; while (msg->sg.data[i].length && trim >= msg->sg.data[i].length) { trim -= msg->sg.data[i].length; sk_msg_free_elem(sk, msg, i, true); sk_msg_iter_var_prev(i); if (!trim) goto out; } msg->sg.data[i].length -= trim; sk_mem_uncharge(sk, trim); /* Adjust copybreak if it falls into the trimmed part of last buf */ if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length) msg->sg.copybreak = msg->sg.data[i].length; out: sk_msg_iter_var_next(i); msg->sg.end = i; /* If we trim data a full sg elem before curr pointer update * copybreak and current so that any future copy operations * start at new copy location. * However trimmed data that has not yet been used in a copy op * does not require an update. */ if (!msg->sg.size) { msg->sg.curr = msg->sg.start; msg->sg.copybreak = 0; } else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >= sk_msg_iter_dist(msg->sg.start, msg->sg.end)) { sk_msg_iter_var_prev(i); msg->sg.curr = i; msg->sg.copybreak = msg->sg.data[i].length; } } EXPORT_SYMBOL_GPL(sk_msg_trim); int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes) { int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg); const int to_max_pages = MAX_MSG_FRAGS; struct page *pages[MAX_MSG_FRAGS]; ssize_t orig, copied, use, offset; orig = msg->sg.size; while (bytes > 0) { i = 0; maxpages = to_max_pages - num_elems; if (maxpages == 0) { ret = -EFAULT; goto out; } copied = iov_iter_get_pages2(from, pages, bytes, maxpages, &offset); if (copied <= 0) { ret = -EFAULT; goto out; } bytes -= copied; msg->sg.size += copied; while (copied) { use = min_t(int, copied, PAGE_SIZE - offset); sg_set_page(&msg->sg.data[msg->sg.end], pages[i], use, offset); sg_unmark_end(&msg->sg.data[msg->sg.end]); sk_mem_charge(sk, use); offset = 0; copied -= use; sk_msg_iter_next(msg, end); num_elems++; i++; } /* When zerocopy is mixed with sk_msg_*copy* operations we * may have a copybreak set in this case clear and prefer * zerocopy remainder when possible. */ msg->sg.copybreak = 0; msg->sg.curr = msg->sg.end; } out: /* Revert iov_iter updates, msg will need to use 'trim' later if it * also needs to be cleared. */ if (ret) iov_iter_revert(from, msg->sg.size - orig); return ret; } EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter); int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes) { int ret = -ENOSPC, i = msg->sg.curr; u32 copy, buf_size, copied = 0; struct scatterlist *sge; void *to; do { sge = sk_msg_elem(msg, i); /* This is possible if a trim operation shrunk the buffer */ if (msg->sg.copybreak >= sge->length) { msg->sg.copybreak = 0; sk_msg_iter_var_next(i); if (i == msg->sg.end) break; sge = sk_msg_elem(msg, i); } buf_size = sge->length - msg->sg.copybreak; copy = (buf_size > bytes) ? bytes : buf_size; to = sg_virt(sge) + msg->sg.copybreak; msg->sg.copybreak += copy; if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) ret = copy_from_iter_nocache(to, copy, from); else ret = copy_from_iter(to, copy, from); if (ret != copy) { ret = -EFAULT; goto out; } bytes -= copy; copied += copy; if (!bytes) break; msg->sg.copybreak = 0; sk_msg_iter_var_next(i); } while (i != msg->sg.end); out: msg->sg.curr = i; return (ret < 0) ? ret : copied; } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); /* Receive sk_msg from psock->ingress_msg to @msg. */ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags) { struct iov_iter *iter = &msg->msg_iter; int peek = flags & MSG_PEEK; struct sk_msg *msg_rx; int i, copied = 0; msg_rx = sk_psock_peek_msg(psock); while (copied != len) { struct scatterlist *sge; if (unlikely(!msg_rx)) break; i = msg_rx->sg.start; do { struct page *page; int copy; sge = sk_msg_elem(msg_rx, i); copy = sge->length; page = sg_page(sge); if (copied + copy > len) copy = len - copied; if (copy) copy = copy_page_to_iter(page, sge->offset, copy, iter); if (!copy) { copied = copied ? copied : -EFAULT; goto out; } copied += copy; if (likely(!peek)) { sge->offset += copy; sge->length -= copy; if (!msg_rx->skb) { sk_mem_uncharge(sk, copy); atomic_sub(copy, &sk->sk_rmem_alloc); } msg_rx->sg.size -= copy; if (!sge->length) { sk_msg_iter_var_next(i); if (!msg_rx->skb) put_page(page); } } else { /* Lets not optimize peek case if copy_page_to_iter * didn't copy the entire length lets just break. */ if (copy != sge->length) goto out; sk_msg_iter_var_next(i); } if (copied == len) break; } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); if (!msg_rx) break; continue; } msg_rx->sg.start = i; if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } msg_rx = sk_psock_peek_msg(psock); } out: return copied; } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); bool sk_msg_is_readable(struct sock *sk) { struct sk_psock *psock; bool empty = true; rcu_read_lock(); psock = sk_psock(sk); if (likely(psock)) empty = list_empty(&psock->ingress_msg); rcu_read_unlock(); return !empty; } EXPORT_SYMBOL_GPL(sk_msg_is_readable); static struct sk_msg *alloc_sk_msg(gfp_t gfp) { struct sk_msg *msg; msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN); if (unlikely(!msg)) return NULL; sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); return msg; } static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return NULL; if (!sk_rmem_schedule(sk, skb, skb->truesize)) return NULL; return alloc_sk_msg(GFP_KERNEL); } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, u32 off, u32 len, struct sk_psock *psock, struct sock *sk, struct sk_msg *msg) { int num_sge, copied; num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (num_sge < 0) { /* skb linearize may fail with ENOMEM, but lets simply try again * later if this happens. Under memory pressure we don't want to * drop the skb. We need to linearize the skb so that the mapping * in skb_to_sgvec can not error. */ if (skb_linearize(skb)) return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (unlikely(num_sge < 0)) return num_sge; } copied = len; msg->sg.start = 0; msg->sg.size = copied; msg->sg.end = num_sge; msg->skb = skb; sk_psock_queue_msg(psock, msg); sk_psock_data_ready(sk, psock); return copied; } static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len); static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { struct sock *sk = psock->sk; struct sk_msg *msg; int err; /* If we are receiving on the same sock skb->sk is already assigned, * skip memory accounting and owner transition seeing it already set * correctly. */ if (unlikely(skb->sk == sk)) return sk_psock_skb_ingress_self(psock, skb, off, len); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; /* This will transition ownership of the data from the socket where * the BPF program was run initiating the redirect to the socket * we will eventually receive this data on. The data will be released * from skb_consume found in __tcp_bpf_recvmsg() after its been copied * into user buffers. */ skb_set_owner_r(skb, sk); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; } /* Puts an skb on the ingress queue of the socket already assigned to the * skb. In this case we do not need to check memory limits or skb_set_owner_r * because the skb is already accounted for here. */ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) { struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); struct sock *sk = psock->sk; int err; if (unlikely(!msg)) return -EAGAIN; skb_set_owner_r(skb, sk); err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; } static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { int err = 0; if (!ingress) { if (!sock_writeable(psock->sk)) return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } skb_get(skb); err = sk_psock_skb_ingress(psock, skb, off, len); if (err < 0) kfree_skb(skb); return err; } static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, int len, int off) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { state->len = len; state->off = off; } spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_backlog(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct sk_psock *psock = container_of(dwork, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; struct sk_buff *skb = NULL; u32 len = 0, off = 0; bool ingress; int ret; mutex_lock(&psock->work_mutex); if (unlikely(state->len)) { len = state->len; off = state->off; } while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; off = 0; if (skb_bpf_strparser(skb)) { struct strp_msg *stm = strp_msg(skb); off = stm->offset; len = stm->full_len; } ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { ret = -EIO; if (!sock_flag(psock->sk, SOCK_DEAD)) ret = sk_psock_handle_skb(psock, skb, off, len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { sk_psock_skb_state(psock, state, len, off); /* Delay slightly to prioritize any * other work that might be here. */ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) schedule_delayed_work(&psock->work, 1); goto end; } /* Hard errors break pipe and stop xmit. */ sk_psock_report_error(psock, ret ? -ret : EPIPE); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); goto end; } off += ret; len -= ret; } while (len); skb = skb_dequeue(&psock->ingress_skb); kfree_skb(skb); } end: mutex_unlock(&psock->work_mutex); } struct sk_psock *sk_psock_init(struct sock *sk, int node) { struct sk_psock *psock; struct proto *prot; write_lock_bh(&sk->sk_callback_lock); if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) { psock = ERR_PTR(-EINVAL); goto out; } if (sk->sk_user_data) { psock = ERR_PTR(-EBUSY); goto out; } psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node); if (!psock) { psock = ERR_PTR(-ENOMEM); goto out; } prot = READ_ONCE(sk->sk_prot); psock->sk = sk; psock->eval = __SK_NONE; psock->sk_proto = prot; psock->saved_unhash = prot->unhash; psock->saved_destroy = prot->destroy; psock->saved_close = prot->close; psock->saved_write_space = sk->sk_write_space; INIT_LIST_HEAD(&psock->link); spin_lock_init(&psock->link_lock); INIT_DELAYED_WORK(&psock->work, sk_psock_backlog); mutex_init(&psock->work_mutex); INIT_LIST_HEAD(&psock->ingress_msg); spin_lock_init(&psock->ingress_lock); skb_queue_head_init(&psock->ingress_skb); sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); refcount_set(&psock->refcnt, 1); __rcu_assign_sk_user_data_with_flags(sk, psock, SK_USER_DATA_NOCOPY | SK_USER_DATA_PSOCK); sock_hold(sk); out: write_unlock_bh(&sk->sk_callback_lock); return psock; } EXPORT_SYMBOL_GPL(sk_psock_init); struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock) { struct sk_psock_link *link; spin_lock_bh(&psock->link_lock); link = list_first_entry_or_null(&psock->link, struct sk_psock_link, list); if (link) list_del(&link->list); spin_unlock_bh(&psock->link_lock); return link; } static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) { struct sk_msg *msg, *tmp; list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { list_del(&msg->list); if (!msg->skb) atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); sk_msg_free(psock->sk, msg); kfree(msg); } } static void __sk_psock_zap_ingress(struct sk_psock *psock) { struct sk_buff *skb; while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) { skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } __sk_psock_purge_ingress_msg(psock); } static void sk_psock_link_destroy(struct sk_psock *psock) { struct sk_psock_link *link, *tmp; list_for_each_entry_safe(link, tmp, &psock->link, list) { list_del(&link->list); sk_psock_free_link(link); } } void sk_psock_stop(struct sk_psock *psock) { spin_lock_bh(&psock->ingress_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_cork_free(psock); spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_done_strp(struct sk_psock *psock); static void sk_psock_destroy(struct work_struct *work) { struct sk_psock *psock = container_of(to_rcu_work(work), struct sk_psock, rwork); /* No sk_callback_lock since already detached. */ sk_psock_done_strp(psock); cancel_delayed_work_sync(&psock->work); __sk_psock_zap_ingress(psock); mutex_destroy(&psock->work_mutex); psock_progs_drop(&psock->progs); sk_psock_link_destroy(psock); sk_psock_cork_free(psock); if (psock->sk_redir) sock_put(psock->sk_redir); if (psock->sk_pair) sock_put(psock->sk_pair); sock_put(psock->sk); kfree(psock); } void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); if (psock->progs.stream_parser) sk_psock_stop_strp(sk, psock); else if (psock->progs.stream_verdict || psock->progs.skb_verdict) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); sk_psock_stop(psock); INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); } EXPORT_SYMBOL_GPL(sk_psock_drop); static int sk_psock_map_verd(int verdict, bool redir) { switch (verdict) { case SK_PASS: return redir ? __SK_REDIRECT : __SK_PASS; case SK_DROP: default: break; } return __SK_DROP; } int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg) { struct bpf_prog *prog; int ret; rcu_read_lock(); prog = READ_ONCE(psock->progs.msg_parser); if (unlikely(!prog)) { ret = __SK_PASS; goto out; } sk_msg_compute_data_pointers(msg); msg->sk = sk; ret = bpf_prog_run_pin_on_cpu(prog, msg); ret = sk_psock_map_verd(ret, msg->sk_redir); psock->apply_bytes = msg->apply_bytes; if (ret == __SK_REDIRECT) { if (psock->sk_redir) { sock_put(psock->sk_redir); psock->sk_redir = NULL; } if (!msg->sk_redir) { ret = __SK_DROP; goto out; } psock->redir_ingress = sk_msg_to_ingress(msg); psock->sk_redir = msg->sk_redir; sock_hold(psock->sk_redir); } out: rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(sk_psock_msg_verdict); static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; sk_other = skb_bpf_redirect_fetch(skb); /* This error is a buggy BPF program, it returned a redirect * return code, but then didn't set a redirect interface. */ if (unlikely(!sk_other)) { skb_bpf_redirect_clear(skb); sock_drop(from->sk, skb); return -EIO; } psock_other = sk_psock(sk_other); /* This error indicates the socket is being torn down or had another * error that caused the pipe to break. We can't send a packet on * a socket that is in this state so we drop the skb. */ if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) { skb_bpf_redirect_clear(skb); sock_drop(from->sk, skb); return -EIO; } spin_lock_bh(&psock_other->ingress_lock); if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { spin_unlock_bh(&psock_other->ingress_lock); skb_bpf_redirect_clear(skb); sock_drop(from->sk, skb); return -EIO; } skb_queue_tail(&psock_other->ingress_skb, skb); schedule_delayed_work(&psock_other->work, 0); spin_unlock_bh(&psock_other->ingress_lock); return 0; } static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sk_psock *from, int verdict) { switch (verdict) { case __SK_REDIRECT: sk_psock_skb_redirect(from, skb); break; case __SK_PASS: case __SK_DROP: default: break; } } int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) { struct bpf_prog *prog; int ret = __SK_PASS; rcu_read_lock(); prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { skb->sk = psock->sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } sk_psock_tls_verdict_apply(skb, psock, ret); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, int verdict) { struct sock *sk_other; int err = 0; u32 len, off; switch (verdict) { case __SK_PASS: err = -EIO; sk_other = psock->sk; if (sock_flag(sk_other, SOCK_DEAD) || !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) goto out_free; skb_bpf_set_ingress(skb); /* If the queue is empty then we can submit directly * into the msg queue. If its not empty we have to * queue work otherwise we may get OOO data. Otherwise, * if sk_psock_skb_ingress errors will be handled by * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { len = skb->len; off = 0; if (skb_bpf_strparser(skb)) { struct strp_msg *stm = strp_msg(skb); off = stm->offset; len = stm->full_len; } err = sk_psock_skb_ingress_self(psock, skb, off, len); } if (err < 0) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { skb_queue_tail(&psock->ingress_skb, skb); schedule_delayed_work(&psock->work, 0); err = 0; } spin_unlock_bh(&psock->ingress_lock); if (err < 0) goto out_free; } break; case __SK_REDIRECT: tcp_eat_skb(psock->sk, skb); err = sk_psock_skb_redirect(psock, skb); break; case __SK_DROP: default: out_free: skb_bpf_redirect_clear(skb); tcp_eat_skb(psock->sk, skb); sock_drop(psock->sk, skb); } return err; } static void sk_psock_write_space(struct sock *sk) { struct sk_psock *psock; void (*write_space)(struct sock *sk) = NULL; rcu_read_lock(); psock = sk_psock(sk); if (likely(psock)) { if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) schedule_delayed_work(&psock->work, 0); write_space = psock->saved_write_space; } rcu_read_unlock(); if (write_space) write_space(sk); } #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; struct sock *sk; rcu_read_lock(); sk = strp->sk; psock = sk_psock(sk); if (unlikely(!psock)) { sock_drop(sk, skb); goto out; } prog = READ_ONCE(psock->progs.stream_verdict); if (likely(prog)) { skb->sk = sk; skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); skb_bpf_set_strparser(skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } sk_psock_verdict_apply(psock, skb, ret); out: rcu_read_unlock(); } static int sk_psock_strp_read_done(struct strparser *strp, int err) { return err; } static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb) { struct sk_psock *psock = container_of(strp, struct sk_psock, strp); struct bpf_prog *prog; int ret = skb->len; rcu_read_lock(); prog = READ_ONCE(psock->progs.stream_parser); if (likely(prog)) { skb->sk = psock->sk; ret = bpf_prog_run_pin_on_cpu(prog, skb); skb->sk = NULL; } rcu_read_unlock(); return ret; } /* Called with socket lock held. */ static void sk_psock_strp_data_ready(struct sock *sk) { struct sk_psock *psock; trace_sk_data_ready(sk); rcu_read_lock(); psock = sk_psock(sk); if (likely(psock)) { if (tls_sw_has_ctx_rx(sk)) { psock->saved_data_ready(sk); } else { read_lock_bh(&sk->sk_callback_lock); strp_data_ready(&psock->strp); read_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); } int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) { int ret; static const struct strp_callbacks cb = { .rcv_msg = sk_psock_strp_read, .read_sock_done = sk_psock_strp_read_done, .parse_msg = sk_psock_strp_parse, }; ret = strp_init(&psock->strp, sk, &cb); if (!ret) sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED); return ret; } void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) { if (psock->saved_data_ready) return; psock->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = sk_psock_strp_data_ready; sk->sk_write_space = sk_psock_write_space; } void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) { psock_set_prog(&psock->progs.stream_parser, NULL); if (!psock->saved_data_ready) return; sk->sk_data_ready = psock->saved_data_ready; psock->saved_data_ready = NULL; strp_stop(&psock->strp); } static void sk_psock_done_strp(struct sk_psock *psock) { /* Parser has been stopped */ if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED)) strp_done(&psock->strp); } #else static void sk_psock_done_strp(struct sk_psock *psock) { } #endif /* CONFIG_BPF_STREAM_PARSER */ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) { struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; int len = skb->len; rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { len = 0; tcp_eat_skb(sk, skb); sock_drop(sk, skb); goto out; } prog = READ_ONCE(psock->progs.stream_verdict); if (!prog) prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); } ret = sk_psock_verdict_apply(psock, skb, ret); if (ret < 0) len = ret; out: rcu_read_unlock(); return len; } static void sk_psock_verdict_data_ready(struct sock *sk) { struct socket *sock = sk->sk_socket; const struct proto_ops *ops; int copied; trace_sk_data_ready(sk); if (unlikely(!sock)) return; ops = READ_ONCE(sock->ops); if (!ops || !ops->read_skb) return; copied = ops->read_skb(sk, sk_psock_verdict_recv); if (copied >= 0) { struct sk_psock *psock; rcu_read_lock(); psock = sk_psock(sk); if (psock) sk_psock_data_ready(sk, psock); rcu_read_unlock(); } } void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) { if (psock->saved_data_ready) return; psock->saved_data_ready = sk->sk_data_ready; sk->sk_data_ready = sk_psock_verdict_data_ready; sk->sk_write_space = sk_psock_write_space; } void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) { psock_set_prog(&psock->progs.stream_verdict, NULL); psock_set_prog(&psock->progs.skb_verdict, NULL); if (!psock->saved_data_ready) return; sk->sk_data_ready = psock->saved_data_ready; psock->saved_data_ready = NULL; }
32 32 32 32 32 32 32 32 32 32 18 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/ceph/ceph_debug.h> #include <linux/types.h> #include <linux/percpu_counter.h> #include <linux/math64.h> #include "metric.h" #include "mds_client.h" static void ktime_to_ceph_timespec(struct ceph_timespec *ts, ktime_t val) { struct timespec64 t = ktime_to_timespec64(val); ceph_encode_timespec64(ts, &t); } static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, struct ceph_mds_session *s) { struct ceph_metric_head *head; struct ceph_metric_cap *cap; struct ceph_metric_read_latency *read; struct ceph_metric_write_latency *write; struct ceph_metric_metadata_latency *meta; struct ceph_metric_dlease *dlease; struct ceph_opened_files *files; struct ceph_pinned_icaps *icaps; struct ceph_opened_inodes *inodes; struct ceph_read_io_size *rsize; struct ceph_write_io_size *wsize; struct ceph_client_metric *m = &mdsc->metric; u64 nr_caps = atomic64_read(&m->total_caps); u32 header_len = sizeof(struct ceph_metric_header); struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; s64 sum; s32 items = 0; s32 len; /* Do not send the metrics until the MDS rank is ready */ mutex_lock(&mdsc->mutex); if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) { mutex_unlock(&mdsc->mutex); return false; } mutex_unlock(&mdsc->mutex); len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + sizeof(*meta) + sizeof(*dlease) + sizeof(*files) + sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize) + sizeof(*wsize); msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); if (!msg) { pr_err_client(cl, "to mds%d, failed to allocate message\n", s->s_mds); return false; } head = msg->front.iov_base; /* encode the cap metric */ cap = (struct ceph_metric_cap *)(head + 1); cap->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); cap->header.ver = 1; cap->header.compat = 1; cap->header.data_len = cpu_to_le32(sizeof(*cap) - header_len); cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit)); cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis)); cap->total = cpu_to_le64(nr_caps); items++; /* encode the read latency metric */ read = (struct ceph_metric_read_latency *)(cap + 1); read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); read->header.ver = 2; read->header.compat = 1; read->header.data_len = cpu_to_le32(sizeof(*read) - header_len); sum = m->metric[METRIC_READ].latency_sum; ktime_to_ceph_timespec(&read->lat, sum); ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg); read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum); read->count = cpu_to_le64(m->metric[METRIC_READ].total); items++; /* encode the write latency metric */ write = (struct ceph_metric_write_latency *)(read + 1); write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); write->header.ver = 2; write->header.compat = 1; write->header.data_len = cpu_to_le32(sizeof(*write) - header_len); sum = m->metric[METRIC_WRITE].latency_sum; ktime_to_ceph_timespec(&write->lat, sum); ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg); write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum); write->count = cpu_to_le64(m->metric[METRIC_WRITE].total); items++; /* encode the metadata latency metric */ meta = (struct ceph_metric_metadata_latency *)(write + 1); meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); meta->header.ver = 2; meta->header.compat = 1; meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len); sum = m->metric[METRIC_METADATA].latency_sum; ktime_to_ceph_timespec(&meta->lat, sum); ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg); meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum); meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total); items++; /* encode the dentry lease metric */ dlease = (struct ceph_metric_dlease *)(meta + 1); dlease->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE); dlease->header.ver = 1; dlease->header.compat = 1; dlease->header.data_len = cpu_to_le32(sizeof(*dlease) - header_len); dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit)); dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis)); dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries)); items++; sum = percpu_counter_sum(&m->total_inodes); /* encode the opened files metric */ files = (struct ceph_opened_files *)(dlease + 1); files->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES); files->header.ver = 1; files->header.compat = 1; files->header.data_len = cpu_to_le32(sizeof(*files) - header_len); files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files)); files->total = cpu_to_le64(sum); items++; /* encode the pinned icaps metric */ icaps = (struct ceph_pinned_icaps *)(files + 1); icaps->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS); icaps->header.ver = 1; icaps->header.compat = 1; icaps->header.data_len = cpu_to_le32(sizeof(*icaps) - header_len); icaps->pinned_icaps = cpu_to_le64(nr_caps); icaps->total = cpu_to_le64(sum); items++; /* encode the opened inodes metric */ inodes = (struct ceph_opened_inodes *)(icaps + 1); inodes->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES); inodes->header.ver = 1; inodes->header.compat = 1; inodes->header.data_len = cpu_to_le32(sizeof(*inodes) - header_len); inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes)); inodes->total = cpu_to_le64(sum); items++; /* encode the read io size metric */ rsize = (struct ceph_read_io_size *)(inodes + 1); rsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_IO_SIZES); rsize->header.ver = 1; rsize->header.compat = 1; rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len); rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total); rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum); items++; /* encode the write io size metric */ wsize = (struct ceph_write_io_size *)(rsize + 1); wsize->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_IO_SIZES); wsize->header.ver = 1; wsize->header.compat = 1; wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len); wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total); wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum); items++; put_unaligned_le32(items, &head->num); msg->front.iov_len = len; msg->hdr.version = cpu_to_le16(1); msg->hdr.compat_version = cpu_to_le16(1); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); ceph_con_send(&s->s_con, msg); return true; } static void metric_get_session(struct ceph_mds_client *mdsc) { struct ceph_mds_session *s; int i; mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { s = __ceph_lookup_mds_session(mdsc, i); if (!s) continue; /* * Skip it if MDS doesn't support the metric collection, * or the MDS will close the session's socket connection * directly when it get this message. */ if (check_session_state(s) && test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) { mdsc->metric.session = s; break; } ceph_put_mds_session(s); } mutex_unlock(&mdsc->mutex); } static void metric_delayed_work(struct work_struct *work) { struct ceph_client_metric *m = container_of(work, struct ceph_client_metric, delayed_work.work); struct ceph_mds_client *mdsc = container_of(m, struct ceph_mds_client, metric); if (mdsc->stopping || disable_send_metrics) return; if (!m->session || !check_session_state(m->session)) { if (m->session) { ceph_put_mds_session(m->session); m->session = NULL; } metric_get_session(mdsc); } if (m->session) { ceph_mdsc_send_metrics(mdsc, m->session); metric_schedule_delayed(m); } } int ceph_metric_init(struct ceph_client_metric *m) { struct ceph_metric *metric; int ret, i; if (!m) return -EINVAL; atomic64_set(&m->total_dentries, 0); ret = percpu_counter_init(&m->d_lease_hit, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&m->d_lease_mis, 0, GFP_KERNEL); if (ret) goto err_d_lease_mis; atomic64_set(&m->total_caps, 0); ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL); if (ret) goto err_i_caps_hit; ret = percpu_counter_init(&m->i_caps_mis, 0, GFP_KERNEL); if (ret) goto err_i_caps_mis; for (i = 0; i < METRIC_MAX; i++) { metric = &m->metric[i]; spin_lock_init(&metric->lock); metric->size_sum = 0; metric->size_min = U64_MAX; metric->size_max = 0; metric->total = 0; metric->latency_sum = 0; metric->latency_avg = 0; metric->latency_sq_sum = 0; metric->latency_min = KTIME_MAX; metric->latency_max = 0; } atomic64_set(&m->opened_files, 0); ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL); if (ret) goto err_opened_inodes; ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL); if (ret) goto err_total_inodes; m->session = NULL; INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); return 0; err_total_inodes: percpu_counter_destroy(&m->opened_inodes); err_opened_inodes: percpu_counter_destroy(&m->i_caps_mis); err_i_caps_mis: percpu_counter_destroy(&m->i_caps_hit); err_i_caps_hit: percpu_counter_destroy(&m->d_lease_mis); err_d_lease_mis: percpu_counter_destroy(&m->d_lease_hit); return ret; } void ceph_metric_destroy(struct ceph_client_metric *m) { if (!m) return; cancel_delayed_work_sync(&m->delayed_work); percpu_counter_destroy(&m->total_inodes); percpu_counter_destroy(&m->opened_inodes); percpu_counter_destroy(&m->i_caps_mis); percpu_counter_destroy(&m->i_caps_hit); percpu_counter_destroy(&m->d_lease_mis); percpu_counter_destroy(&m->d_lease_hit); ceph_put_mds_session(m->session); } #define METRIC_UPDATE_MIN_MAX(min, max, new) \ { \ if (unlikely(new < min)) \ min = new; \ if (unlikely(new > max)) \ max = new; \ } static inline void __update_mean_and_stdev(ktime_t total, ktime_t *lavg, ktime_t *sq_sump, ktime_t lat) { ktime_t avg; if (unlikely(total == 1)) { *lavg = lat; } else { /* the sq is (lat - old_avg) * (lat - new_avg) */ avg = *lavg + div64_s64(lat - *lavg, total); *sq_sump += (lat - *lavg)*(lat - avg); *lavg = avg; } } void ceph_update_metrics(struct ceph_metric *m, ktime_t r_start, ktime_t r_end, unsigned int size, int rc) { ktime_t lat = ktime_sub(r_end, r_start); ktime_t total; if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT)) return; spin_lock(&m->lock); total = ++m->total; m->size_sum += size; METRIC_UPDATE_MIN_MAX(m->size_min, m->size_max, size); m->latency_sum += lat; METRIC_UPDATE_MIN_MAX(m->latency_min, m->latency_max, lat); __update_mean_and_stdev(total, &m->latency_avg, &m->latency_sq_sum, lat); spin_unlock(&m->lock); }
1 1 1 1 17 9 6 9 21 17 12 2 6 7 7 7 2 2 2 1 2 2 3 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Facebook #include <linux/debugfs.h> #include <linux/ethtool.h> #include <linux/random.h> #include "netdevsim.h" static void nsim_get_pause_stats(struct net_device *dev, struct ethtool_pause_stats *pause_stats) { struct netdevsim *ns = netdev_priv(dev); if (ns->ethtool.pauseparam.report_stats_rx) pause_stats->rx_pause_frames = 1; if (ns->ethtool.pauseparam.report_stats_tx) pause_stats->tx_pause_frames = 2; } static void nsim_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct netdevsim *ns = netdev_priv(dev); pause->autoneg = 0; /* We don't support ksettings, so can't pretend */ pause->rx_pause = ns->ethtool.pauseparam.rx; pause->tx_pause = ns->ethtool.pauseparam.tx; } static int nsim_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct netdevsim *ns = netdev_priv(dev); if (pause->autoneg) return -EINVAL; ns->ethtool.pauseparam.rx = pause->rx_pause; ns->ethtool.pauseparam.tx = pause->tx_pause; return 0; } static int nsim_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); memcpy(coal, &ns->ethtool.coalesce, sizeof(ns->ethtool.coalesce)); return 0; } static int nsim_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); memcpy(&ns->ethtool.coalesce, coal, sizeof(ns->ethtool.coalesce)); return 0; } static void nsim_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); } static int nsim_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { struct netdevsim *ns = netdev_priv(dev); ns->ethtool.ring.rx_pending = ring->rx_pending; ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending; ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending; ns->ethtool.ring.tx_pending = ring->tx_pending; return 0; } static void nsim_get_channels(struct net_device *dev, struct ethtool_channels *ch) { struct netdevsim *ns = netdev_priv(dev); ch->max_combined = ns->nsim_bus_dev->num_queues; ch->combined_count = ns->ethtool.channels; } static int nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct netdevsim *ns = netdev_priv(dev); int err; mutex_lock(&dev->lock); err = netif_set_real_num_queues(dev, ch->combined_count, ch->combined_count); mutex_unlock(&dev->lock); if (err) return err; ns->ethtool.channels = ch->combined_count; return 0; } static int nsim_get_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) { struct netdevsim *ns = netdev_priv(dev); if (ns->ethtool.get_err) return -ns->ethtool.get_err; memcpy(fecparam, &ns->ethtool.fec, sizeof(ns->ethtool.fec)); return 0; } static int nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) { struct netdevsim *ns = netdev_priv(dev); u32 fec; if (ns->ethtool.set_err) return -ns->ethtool.set_err; memcpy(&ns->ethtool.fec, fecparam, sizeof(ns->ethtool.fec)); fec = fecparam->fec; if (fec == ETHTOOL_FEC_AUTO) fec |= ETHTOOL_FEC_OFF; fec |= ETHTOOL_FEC_NONE; ns->ethtool.fec.active_fec = 1 << (fls(fec) - 1); return 0; } static void nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats) { fec_stats->corrected_blocks.total = 123; fec_stats->uncorrectable_blocks.total = 4; } static int nsim_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct netdevsim *ns = netdev_priv(dev); info->phc_index = mock_phc_index(ns->phc); return 0; } static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, .get_pause_stats = nsim_get_pause_stats, .get_pauseparam = nsim_get_pauseparam, .set_pauseparam = nsim_set_pauseparam, .set_coalesce = nsim_set_coalesce, .get_coalesce = nsim_get_coalesce, .get_ringparam = nsim_get_ringparam, .set_ringparam = nsim_set_ringparam, .get_channels = nsim_get_channels, .set_channels = nsim_set_channels, .get_fecparam = nsim_get_fecparam, .set_fecparam = nsim_set_fecparam, .get_fec_stats = nsim_get_fec_stats, .get_ts_info = nsim_get_ts_info, }; static void nsim_ethtool_ring_init(struct netdevsim *ns) { ns->ethtool.ring.rx_max_pending = 4096; ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; } void nsim_ethtool_init(struct netdevsim *ns) { struct dentry *ethtool, *dir; ns->netdev->ethtool_ops = &nsim_ethtool_ops; nsim_ethtool_ring_init(ns); ns->ethtool.pauseparam.report_stats_rx = true; ns->ethtool.pauseparam.report_stats_tx = true; ns->ethtool.fec.fec = ETHTOOL_FEC_NONE; ns->ethtool.fec.active_fec = ETHTOOL_FEC_NONE; ns->ethtool.channels = ns->nsim_bus_dev->num_queues; ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir); debugfs_create_u32("get_err", 0600, ethtool, &ns->ethtool.get_err); debugfs_create_u32("set_err", 0600, ethtool, &ns->ethtool.set_err); dir = debugfs_create_dir("pause", ethtool); debugfs_create_bool("report_stats_rx", 0600, dir, &ns->ethtool.pauseparam.report_stats_rx); debugfs_create_bool("report_stats_tx", 0600, dir, &ns->ethtool.pauseparam.report_stats_tx); dir = debugfs_create_dir("ring", ethtool); debugfs_create_u32("rx_max_pending", 0600, dir, &ns->ethtool.ring.rx_max_pending); debugfs_create_u32("rx_jumbo_max_pending", 0600, dir, &ns->ethtool.ring.rx_jumbo_max_pending); debugfs_create_u32("rx_mini_max_pending", 0600, dir, &ns->ethtool.ring.rx_mini_max_pending); debugfs_create_u32("tx_max_pending", 0600, dir, &ns->ethtool.ring.tx_max_pending); }
271 283 10 271 271 271 271 10 21 21 3 21 21 21 21 9 21 21 1 21 383 390 386 126 539 316 159 158 229 124 80 124 126 330 34 20 17 15 2 15 34 48 48 48 48 445 444 438 309 309 19 440 447 336 332 2 330 329 324 11 3 2 325 1 326 325 321 322 323 320 333 41 41 41 41 41 4 33 1 1 33 9 9 9 8 7 7 5 4 4 8 2 1 1 49 49 49 48 47 46 45 44 41 41 33 5 1 367 367 261 184 1 367 368 365 364 3 2 3 2 3 2 3 1 4 365 15 14 12 3 1 11 9 15 129 129 128 127 126 125 121 4 108 121 118 14 14 13 12 11 7 110 84 110 109 109 8 8 114 113 110 110 110 114 9 99 99 3 97 95 96 2 94 32 64 64 67 67 44 44 38 44 34 33 32 17 33 67 7 67 67 67 67 66 36 34 31 290 4 305 304 34 6 6 298 28 15 299 7 299 2 294 299 18 17 6 14 251 250 167 248 249 249 246 5 1 5 5 5 2 5 3 2 2 2 4 54 54 54 1 1 54 6 3 3 273 270 257 272 271 271 20 271 11 11 3 267 242 241 241 265 6 264 263 264 264 264 1 263 263 239 255 292 259 261 273 270 270 269 271 271 273 306 307 292 255 287 4 3 2 2 2 1 254 8 251 2 245 79 52 44 271 26 11 271 66 16 16 1 1 16 1 48 48 2 47 1 1 47 29 25 1 45 29 28 29 29 29 29 19 10 29 1 29 7 29 5 29 28 16 7 3 3 3 12 2 46 48 2 1 9 7 7 9 5 9 4 2 1 1 4 3 2 1 68 29 69 7 4 4 5 2 4 1 3 64 31 14 38 12 31 60 6 11 34 9 29 37 40 24 6 24 24 13 17 34 65 3 3 5 2 3 63 30 14 33 31 29 21 17 18 16 7 45 3 8 7 4 9 7 7 7 7 3 65 11 8 2 8 8 7 5 69 78 76 75 77 77 76 75 73 74 11 71 60 4 60 69 69 27 27 5 3 5 1 4 2 5 5 3 5 1 4 2 15 13 12 3 15 11 7 4 8 2 8 1 7 5 11 3 2 15 5 5 11 11 10 4 3 2 3 2 3 1 3 2 10 9 8 7 1 6 3 3 5 5 2 1 8 8 8 6 2 5 2 3 4 5 2 1 4 3 2 2 1 2 1 14 13 9 3 12 2 6 4 2 7 3 2 1 2 7 3 7 3 7 6 3 1 3 2 2 2 1 2 3 1 2 1 2 1 5 4 1 3 8 1 1 7 7 4 3 7 2 4 4 3 2 2 3 4 3 2 1 1 1 8 7 5 2 2 4 2 2 3 4 2 1 4 3 1 5 4 3 11 2 2 9 9 7 2 8 4 7 2 2 1 5 4 3 2 6 4 4 4 3 17 16 15 13 2 2 12 5 7 10 7 2 1 1 16 12 11 9 2 2 9 4 5 6 5 3 2 2 9 9 9 2 2 8 4 4 5 5 3 2 2 12 11 9 2 2 8 5 3 6 3 2 1 1 6 7 5 3 1 1 3 3 3 3 3 10 9 8 2 10 6 3 2 3 3 1 3 2 3 3 1 3 2 3 2 1 2 1 2 4 2 2 1 6 5 5 2 5 13 12 11 2 11 10 2 9 3 6 7 9 2 1 13 13 11 2 12 13 13 12 12 1 1 11 8 3 10 11 2 1 22 22 18 18 6 5 5 8 8 8 21 21 20 7 2 14 6 3 3 4 6 2 1 15 14 11 10 3 10 8 2 5 4 2 3 3 2 1 1 10 10 8 6 6 10 17 16 15 13 2 1 13 2 11 13 13 13 10 9 6 5 4 3 4 1 13 12 11 10 11 9 9 1 11 5 4 3 2 3 6 5 4 3 2 1 2 8 7 2 5 3 2 2 2 2 2 8 7 7 1 6 4 2 3 3 3 3 3 528 533 42 529 494 448 455 493 45 6 10 10 2 9 4 79 16 5 4 10 8 4 4 2 14 7 5 8 1 8 4 11 5 6 17 12 10 12 7 5 5 4 2 6 13 13 13 22 6 8 21 15 5 3 17 6 13 5 6 8 8 492 20 20 19 16 17 21 20 21 2 1 2 20 17 19 17 17 17 13 13 20 7 5 6 3 1 1 7 253 253 252 253 253 68 69 69 1 1 69 12 3 33 24 26 15 15 12 12 7 12 15 21 21 21 9 18 18 18 16 9 21 21 21 16 9 18 18 10 10 18 18 2 2 2 2 2 21 21 21 16 16 16 2 2 21 16 8 9 21 6 5 4 2 2 1 2 2 2 6 4 3 2 1 1 1 1 1 4 3 2 1 2 2 2 2 2 2 4 3 2 2 11 10 9 8 8 8 8 11 11 8 1 7 7 7 11 11 10 10 6 1 5 1 4 2 1 1 1 1 1 18 2 18 17 7 2 15 12 5 3 2 4 1 4 3 1 3 6 1 6 3 13 19 5 1 4 3 3 4 3 1 2 2 4 1 2 1 4 5 4 4 2 2 2 2 1 2 2 2 2 2 7 7 6 6 2 4 4 2 6 6 6 3 7 17 16 16 11 5 4 15 15 9 7 8 6 3 8 8 8 14 7 6 5 3 3 7 2 1 1 8 7 7 5 2 2 6 4 5 3 3 3 2 1 1 2 1 2 4 4 4 6 5 5 4 2 2 4 3 4 5 4 4 3 1 2 2 2 2 3 3 7 6 5 6 5 5 3 2 4 4 4 6 1 1 5 4 5 3 2 4 2 4 1 3 3 2 1 1 1 1 9 1 1 8 7 8 6 4 7 1 6 3 2 1 1 1 1 5 4 4 3 2 2 2 2 2 4 3 3 3 2 2 2 2 2 4 4 3 2 1 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 3 2 1 2 2 14 13 12 8 12 11 11 7 11 2 9 9 8 7 2 4 5 3 2 2 2 9 6 5 4 2 2 6 3 2 1 2 2 3 2 1 2 2 6 5 5 3 2 4 2 4 8 7 6 4 2 2 5 3 3 5 4 4 3 2 2 2 2 2 9 8 8 7 5 4 2 1 1 1 5 5 5 4 2 4 3 3 8 7 7 5 3 5 3 5 6 5 5 3 2 4 2 4 5 4 4 3 3 3 4 3 3 3 2 2 2 2 2 1 1 5 4 4 3 2 1 2 2 2 2 2 2 2 1 1 1 1 4 3 3 2 1 3 2 3 5 4 4 3 2 3 2 3 7 6 5 5 3 5 4 4 278 10 9 8 3 3 5 4 2 1 1 3 4 10 8 7 6 2 2 4 4 3 1 1 3 4 8 386 386 17 377 372 366 367 6 3 2 4 11 2 19 5 5 3 17 99 8 3 7 4 6 5 7 6 4 2 6 3 1 9 3 5 4 4 2 14 3 9 6 3 3 6 8 5 9 5 8 6 5 4 2 5 2 4 5 278 10 8 357 343 278 343 343 279 278 279 279 278 6 279 341 98 342 44 41 41 41 1 40 35 35 11 1 11 1 10 10 30 320 334 320 2 318 30 1 30 2 2 338 322 322 341 30 30 30 30 30 10 20 29 29 1 34 34 34 33 32 3 2 2 31 1 30 34 34 43 42 43 43 3 40 1 40 2 40 1 40 5 40 19 1 1 43 319 319 318 48 36 36 12 48 48 48 48 48 271 34 34 34 33 3 3 16 16 10 9 4 3 2 1 1 5 20 21 21 21 18 18 13 13 7 10 46 45 46 46 13 45 30 30 30 28 21 8 87 88 83 16 83 83 83 83 83 6 83 75 83 146 81 78 78 78 78 145 146 145 146 146 1 1 145 146 146 232 232 11 11 11 8 7 7 3 7 5 9 2 105 105 105 105 7 105 105 85 10 100 346 346 346 249 212 212 212 209 210 308 39 17 17 5 17 17 16 3 3 1 17 7 7 3 2 7 4 1 1 4 21 1 21 20 21 21 1 21 4 4 4 21 2 2 2 21 21 21 21 20 12 222 222 222 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 Intel Corp. * Copyright (c) 2001-2002 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This file is part of the SCTP kernel implementation * * These functions interface with the sockets layer to implement the * SCTP Extensions for the Sockets API. * * Note that the descriptions from the specification are USER level * functions--this file is the functions which populate the struct proto * for SCTP which is the BOTTOM of the sockets interface. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Narasimha Budihal <narsi@refcode.org> * Karl Knutson <karl@athena.chicago.il.us> * Jon Grimm <jgrimm@us.ibm.com> * Xingang Guo <xingang.guo@intel.com> * Daisy Chang <daisyc@us.ibm.com> * Sridhar Samudrala <samudrala@us.ibm.com> * Inaky Perez-Gonzalez <inaky.gonzalez@intel.com> * Ardelle Fan <ardelle.fan@intel.com> * Ryan Layer <rmlayer@us.ibm.com> * Anup Pemmaiah <pemmaiah@cc.usu.edu> * Kevin Gao <kevin.gao@intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/hash.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/wait.h> #include <linux/time.h> #include <linux/sched/signal.h> #include <linux/ip.h> #include <linux/capability.h> #include <linux/fcntl.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/compat.h> #include <linux/rhashtable.h> #include <net/ip.h> #include <net/icmp.h> #include <net/route.h> #include <net/ipv6.h> #include <net/inet_common.h> #include <net/busy_poll.h> #include <trace/events/sock.h> #include <linux/socket.h> /* for sa_family_t */ #include <linux/export.h> #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> #include <net/rps.h> /* Forward declarations for internal helper functions. */ static bool sctp_writeable(const struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); static void sctp_destruct_sock(struct sock *sk); static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len); static int sctp_bindx_add(struct sock *, struct sockaddr *, int); static int sctp_bindx_rem(struct sock *, struct sockaddr *, int); static int sctp_send_asconf_add_ip(struct sock *, struct sockaddr *, int); static int sctp_send_asconf_del_ip(struct sock *, struct sockaddr *, int); static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk); static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_association *assoc, enum sctp_socket_type type); static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc); struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { WRITE_ONCE(sctp_memory_pressure, 1); } /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { struct sock *sk = asoc->base.sk; return asoc->ep->sndbuf_policy ? sk->sk_sndbuf - asoc->sndbuf_used : sk_stream_wspace(sk); } /* Increment the used sndbuf space count of the corresponding association by * the size of the outgoing data chunk. * Also, set the skb destructor for sndbuf accounting later. * * Since it is always 1-1 between chunk and skb, and also a new skb is always * allocated for chunk bundling in sctp_packet_transmit(), we can use the * destructor in the data chunk skb for the purpose of the sndbuf space * tracking. */ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) { struct sctp_association *asoc = chunk->asoc; struct sock *sk = asoc->base.sk; /* The sndbuf space is tracked per association. */ sctp_association_hold(asoc); if (chunk->shkey) sctp_auth_shkey_hold(chunk->shkey); skb_set_owner_w(chunk->skb, sk); chunk->skb->destructor = sctp_wfree; /* Save the chunk pointer in skb for sctp_wfree to use later. */ skb_shinfo(chunk->skb)->destructor_arg = chunk; refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk); sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk)); sk_mem_charge(sk, chunk->skb->truesize); } static void sctp_clear_owner_w(struct sctp_chunk *chunk) { skb_orphan(chunk->skb); } #define traverse_and_process() \ do { \ msg = chunk->msg; \ if (msg == prev_msg) \ continue; \ list_for_each_entry(c, &msg->chunks, frag_list) { \ if ((clear && asoc->base.sk == c->skb->sk) || \ (!clear && asoc->base.sk != c->skb->sk)) \ cb(c); \ } \ prev_msg = msg; \ } while (0) static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, bool clear, void (*cb)(struct sctp_chunk *)) { struct sctp_datamsg *msg, *prev_msg = NULL; struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chunk, *c; struct sctp_transport *t; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) list_for_each_entry(chunk, &t->transmitted, transmitted_list) traverse_and_process(); list_for_each_entry(chunk, &q->retransmit, transmitted_list) traverse_and_process(); list_for_each_entry(chunk, &q->sacked, transmitted_list) traverse_and_process(); list_for_each_entry(chunk, &q->abandoned, transmitted_list) traverse_and_process(); list_for_each_entry(chunk, &q->out_chunk_list, list) traverse_and_process(); } static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk, void (*cb)(struct sk_buff *, struct sock *)) { struct sk_buff *skb, *tmp; sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp) cb(skb, sk); sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp) cb(skb, sk); sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp) cb(skb, sk); } /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) { struct sctp_af *af; /* Verify basic sockaddr. */ af = sctp_sockaddr_af(sctp_sk(sk), addr, len); if (!af) return -EINVAL; /* Is this a valid SCTP address? */ if (!af->addr_valid(addr, sctp_sk(sk), NULL)) return -EINVAL; if (!sctp_sk(sk)->pf->send_verify(sctp_sk(sk), (addr))) return -EINVAL; return 0; } /* Look up the association by its id. If this is not a UDP-style * socket, the ID field is always ignored. */ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) { struct sctp_association *asoc = NULL; /* If this is not a UDP-style socket, assoc id should be ignored. */ if (!sctp_style(sk, UDP)) { /* Return NULL if the socket state is not ESTABLISHED. It * could be a TCP-style listening socket or a socket which * hasn't yet called connect() to establish an association. */ if (!sctp_sstate(sk, ESTABLISHED) && !sctp_sstate(sk, CLOSING)) return NULL; /* Get the first and the only association from the list. */ if (!list_empty(&sctp_sk(sk)->ep->asocs)) asoc = list_entry(sctp_sk(sk)->ep->asocs.next, struct sctp_association, asocs); return asoc; } /* Otherwise this is a UDP-style socket. */ if (id <= SCTP_ALL_ASSOC) return NULL; spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); if (asoc && (asoc->base.sk != sk || asoc->base.dead)) asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); return asoc; } /* Look up the transport from an address and an assoc id. If both address and * id are specified, the associations matching the address and the id should be * the same. */ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, struct sockaddr_storage *addr, sctp_assoc_t id) { struct sctp_association *addr_asoc = NULL, *id_asoc = NULL; struct sctp_af *af = sctp_get_af_specific(addr->ss_family); union sctp_addr *laddr = (union sctp_addr *)addr; struct sctp_transport *transport; if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len)) return NULL; addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, laddr, &transport); if (!addr_asoc) return NULL; id_asoc = sctp_id2assoc(sk, id); if (id_asoc && (id_asoc != addr_asoc)) return NULL; sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk), (union sctp_addr *)addr); return transport; } /* API 3.1.2 bind() - UDP Style Syntax * The syntax of bind() is, * * ret = bind(int sd, struct sockaddr *addr, int addrlen); * * sd - the socket descriptor returned by socket(). * addr - the address structure (struct sockaddr_in or struct * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; lock_sock(sk); pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); /* Disallow binding twice. */ if (!sctp_sk(sk)->ep->base.bind_addr.port) retval = sctp_do_bind(sk, (union sctp_addr *)addr, addr_len); else retval = -EINVAL; release_sock(sk); return retval; } static int sctp_get_port_local(struct sock *, union sctp_addr *); /* Verify this is a valid sockaddr. */ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len) { struct sctp_af *af; /* Check minimum size. */ if (len < sizeof (struct sockaddr)) return NULL; if (!opt->pf->af_supported(addr->sa.sa_family, opt)) return NULL; if (addr->sa.sa_family == AF_INET6) { if (len < SIN6_LEN_RFC2133) return NULL; /* V4 mapped address are really of AF_INET family */ if (ipv6_addr_v4mapped(&addr->v6.sin6_addr) && !opt->pf->af_supported(AF_INET, opt)) return NULL; } /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); if (len < af->sockaddr_len) return NULL; return af; } static void sctp_auto_asconf_init(struct sctp_sock *sp) { struct net *net = sock_net(&sp->inet.sk); if (net->sctp.default_auto_asconf) { spin_lock_bh(&net->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); spin_unlock_bh(&net->sctp.addr_wq_lock); sp->do_auto_asconf = 1; } } /* Bind a local address either to an endpoint or to an association. */ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { struct net *net = sock_net(sk); struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct sctp_bind_addr *bp = &ep->base.bind_addr; struct sctp_af *af; unsigned short snum; int ret = 0; /* Common sockaddr verification. */ af = sctp_sockaddr_af(sp, addr, len); if (!af) { pr_debug("%s: sk:%p, newaddr:%p, len:%d EINVAL\n", __func__, sk, addr, len); return -EINVAL; } snum = ntohs(addr->v4.sin_port); pr_debug("%s: sk:%p, new addr:%pISc, port:%d, new port:%d, len:%d\n", __func__, sk, &addr->sa, bp->port, snum, len); /* PF specific bind() address verification. */ if (!sp->pf->bind_verify(sp, addr)) return -EADDRNOTAVAIL; /* We must either be unbound, or bind to the same port. * It's OK to allow 0 ports if we are already bound. * We'll just inhert an already bound port in this case */ if (bp->port) { if (!snum) snum = bp->port; else if (snum != bp->port) { pr_debug("%s: new port %d doesn't match existing port " "%d\n", __func__, snum, bp->port); return -EINVAL; } } if (snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; /* See if the address matches any of the addresses we may have * already bound before checking against other endpoints. */ if (sctp_bind_addr_match(bp, addr, sp)) return -EINVAL; /* Make sure we are allowed to bind here. * The function sctp_get_port_local() does duplicate address * detection. */ addr->v4.sin_port = htons(snum); if (sctp_get_port_local(sk, addr)) return -EADDRINUSE; /* Refresh ephemeral port. */ if (!bp->port) { bp->port = inet_sk(sk)->inet_num; sctp_auto_asconf_init(sp); } /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. */ ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len, SCTP_ADDR_SRC, GFP_ATOMIC); if (ret) { sctp_put_port(sk); return ret; } /* Copy back into socket for getsockname() use. */ inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); sp->pf->to_sk_saddr(addr, sk); return ret; } /* ADDIP Section 4.1.1 Congestion Control of ASCONF Chunks * * R1) One and only one ASCONF Chunk MAY be in transit and unacknowledged * at any one time. If a sender, after sending an ASCONF chunk, decides * it needs to transfer another ASCONF Chunk, it MUST wait until the * ASCONF-ACK Chunk returns from the previous ASCONF Chunk before sending a * subsequent ASCONF. Note this restriction binds each side, so at any * time two ASCONF may be in-transit on any given association (one sent * from each endpoint). */ static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { int retval = 0; /* If there is an outstanding ASCONF chunk, queue it for later * transmission. */ if (asoc->addip_last_asconf) { list_add_tail(&chunk->list, &asoc->addip_chunk_list); goto out; } /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(chunk); retval = sctp_primitive_ASCONF(asoc->base.net, asoc, chunk); if (retval) sctp_chunk_free(chunk); else asoc->addip_last_asconf = chunk; out: return retval; } /* Add a list of addresses as bind addresses to local endpoint or * association. * * Basically run through each address specified in the addrs/addrcnt * array/length pair, determine if it is IPv6 or IPv4 and call * sctp_do_bind() on it. * * If any of them fails, then the operation will be reversed and the * ones that were added will be removed. * * Only sctp_setsockopt_bindx() is supposed to call this function. */ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) { int cnt; int retval = 0; void *addr_buf; struct sockaddr *sa_addr; struct sctp_af *af; pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { /* The list may contain either IPv4 or IPv6 address; * determine the address length for walking thru the list. */ sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); if (!af) { retval = -EINVAL; goto err_bindx_add; } retval = sctp_do_bind(sk, (union sctp_addr *)sa_addr, af->sockaddr_len); addr_buf += af->sockaddr_len; err_bindx_add: if (retval < 0) { /* Failed. Cleanup the ones that have been added */ if (cnt > 0) sctp_bindx_rem(sk, addrs, cnt); return retval; } } return retval; } /* Send an ASCONF chunk with Add IP address parameters to all the peers of the * associations that are part of the endpoint indicating that a list of local * addresses are added to the endpoint. * * If any of the addresses is already in the bind address list of the * association, we do not send the chunk for that association. But it will not * affect other associations. * * Only sctp_setsockopt_bindx() is supposed to call this function. */ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; struct sctp_bind_addr *bp; struct sctp_chunk *chunk; struct sctp_sockaddr_entry *laddr; union sctp_addr *addr; union sctp_addr saveaddr; void *addr_buf; struct sctp_af *af; struct list_head *p; int i; int retval = 0; sp = sctp_sk(sk); ep = sp->ep; if (!ep->asconf_enable) return retval; pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { if (!asoc->peer.asconf_capable) continue; if (asoc->peer.addip_disabled_mask & SCTP_PARAM_ADD_IP) continue; if (!sctp_state(asoc, ESTABLISHED)) continue; /* Check if any address in the packed array of addresses is * in the bind address list of the association. If so, * do not send the asconf chunk to its peer, but continue with * other associations. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) { retval = -EINVAL; goto out; } if (sctp_assoc_lookup_laddr(asoc, addr)) break; addr_buf += af->sockaddr_len; } if (i < addrcnt) continue; /* Use the first valid address in bind addr list of * association as Address Parameter of ASCONF CHUNK. */ bp = &asoc->base.bind_addr; p = bp->address_list.next; laddr = list_entry(p, struct sctp_sockaddr_entry, list); chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs, addrcnt, SCTP_PARAM_ADD_IP); if (!chunk) { retval = -ENOMEM; goto out; } /* Add the new addresses to the bind address list with * use_as_src set to 0. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); retval = sctp_add_bind_addr(bp, &saveaddr, sizeof(saveaddr), SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } if (asoc->src_out_of_asoc_ok) { struct sctp_transport *trans; list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); trans->ssthresh = asoc->peer.i.a_rwnd; trans->rto = asoc->rto_initial; sctp_max_rto(asoc, trans); trans->rtt = trans->srtt = trans->rttvar = 0; /* Clear the source and route cache */ sctp_transport_route(trans, NULL, sctp_sk(asoc->base.sk)); } } retval = sctp_send_asconf(asoc, chunk); } out: return retval; } /* Remove a list of addresses from bind addresses list. Do not remove the * last address. * * Basically run through each address specified in the addrs/addrcnt * array/length pair, determine if it is IPv6 or IPv4 and call * sctp_del_bind() on it. * * If any of them fails, then the operation will be reversed and the * ones that were removed will be added back. * * At least one address has to be left; if only one address is * available, the operation will return -EBUSY. * * Only sctp_setsockopt_bindx() is supposed to call this function. */ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; int cnt; struct sctp_bind_addr *bp = &ep->base.bind_addr; int retval = 0; void *addr_buf; union sctp_addr *sa_addr; struct sctp_af *af; pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { /* If the bind address list is empty or if there is only one * bind address, there is nothing more to be removed (we need * at least one address here). */ if (list_empty(&bp->address_list) || (sctp_list_single_entry(&bp->address_list))) { retval = -EBUSY; goto err_bindx_rem; } sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); if (!af) { retval = -EINVAL; goto err_bindx_rem; } if (!af->addr_valid(sa_addr, sp, NULL)) { retval = -EADDRNOTAVAIL; goto err_bindx_rem; } if (sa_addr->v4.sin_port && sa_addr->v4.sin_port != htons(bp->port)) { retval = -EINVAL; goto err_bindx_rem; } if (!sa_addr->v4.sin_port) sa_addr->v4.sin_port = htons(bp->port); /* FIXME - There is probably a need to check if sk->sk_saddr and * sk->sk_rcv_addr are currently set to one of the addresses to * be removed. This is something which needs to be looked into * when we are fixing the outstanding issues with multi-homing * socket routing and failover schemes. Refer to comments in * sctp_do_bind(). -daisy */ retval = sctp_del_bind_addr(bp, sa_addr); addr_buf += af->sockaddr_len; err_bindx_rem: if (retval < 0) { /* Failed. Add the ones that has been removed back */ if (cnt > 0) sctp_bindx_add(sk, addrs, cnt); return retval; } } return retval; } /* Send an ASCONF chunk with Delete IP address parameters to all the peers of * the associations that are part of the endpoint indicating that a list of * local addresses are removed from the endpoint. * * If any of the addresses is already in the bind address list of the * association, we do not send the chunk for that association. But it will not * affect other associations. * * Only sctp_setsockopt_bindx() is supposed to call this function. */ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; struct sctp_transport *transport; struct sctp_bind_addr *bp; struct sctp_chunk *chunk; union sctp_addr *laddr; void *addr_buf; struct sctp_af *af; struct sctp_sockaddr_entry *saddr; int i; int retval = 0; int stored = 0; chunk = NULL; sp = sctp_sk(sk); ep = sp->ep; if (!ep->asconf_enable) return retval; pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { if (!asoc->peer.asconf_capable) continue; if (asoc->peer.addip_disabled_mask & SCTP_PARAM_DEL_IP) continue; if (!sctp_state(asoc, ESTABLISHED)) continue; /* Check if any address in the packed array of addresses is * not present in the bind address list of the association. * If so, do not send the asconf chunk to its peer, but * continue with other associations. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); if (!af) { retval = -EINVAL; goto out; } if (!sctp_assoc_lookup_laddr(asoc, laddr)) break; addr_buf += af->sockaddr_len; } if (i < addrcnt) continue; /* Find one address in the association's bind address list * that is not in the packed array of addresses. This is to * make sure that we do not delete all the addresses in the * association. */ bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); if ((laddr == NULL) && (addrcnt == 1)) { if (asoc->asconf_addr_del_pending) continue; asoc->asconf_addr_del_pending = kzalloc(sizeof(union sctp_addr), GFP_ATOMIC); if (asoc->asconf_addr_del_pending == NULL) { retval = -ENOMEM; goto out; } asoc->asconf_addr_del_pending->sa.sa_family = addrs->sa_family; asoc->asconf_addr_del_pending->v4.sin_port = htons(bp->port); if (addrs->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)addrs; asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr; } else if (addrs->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addrs; asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr; } pr_debug("%s: keep the last address asoc:%p %pISc at %p\n", __func__, asoc, &asoc->asconf_addr_del_pending->sa, asoc->asconf_addr_del_pending); asoc->src_out_of_asoc_ok = 1; stored = 1; goto skip_mkasconf; } if (laddr == NULL) return -EINVAL; /* We do not need RCU protection throughout this loop * because this is done under a socket lock from the * setsockopt call. */ chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt, SCTP_PARAM_DEL_IP); if (!chunk) { retval = -ENOMEM; goto out; } skip_mkasconf: /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) saddr->state = SCTP_ADDR_DEL; } addr_buf += af->sockaddr_len; } /* Update the route and saddr entries for all the transports * as some of the addresses in the bind address list are * about to be deleted and cannot be used as source addresses. */ list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } if (stored) /* We don't need to transmit ASCONF */ continue; retval = sctp_send_asconf(asoc, chunk); } out: return retval; } /* set addr events to assocs in the endpoint. ep and addr_wq must be locked */ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) { struct sock *sk = sctp_opt2sk(sp); union sctp_addr *addr; struct sctp_af *af; /* It is safe to write port space in caller. */ addr = &addrw->a; addr->v4.sin_port = htons(sp->ep->base.bind_addr.port); af = sctp_get_af_specific(addr->sa.sa_family); if (!af) return -EINVAL; if (sctp_verify_addr(sk, addr, af->sockaddr_len)) return -EINVAL; if (addrw->state == SCTP_ADDR_NEW) return sctp_send_asconf_add_ip(sk, (struct sockaddr *)addr, 1); else return sctp_send_asconf_del_ip(sk, (struct sockaddr *)addr, 1); } /* Helper for tunneling sctp_bindx() requests through sctp_setsockopt() * * API 8.1 * int sctp_bindx(int sd, struct sockaddr *addrs, int addrcnt, * int flags); * * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses. * If the sd is an IPv6 socket, the addresses passed can either be IPv4 * or IPv6 addresses. * * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see * Section 3.1.2 for this usage. * * addrs is a pointer to an array of one or more socket addresses. Each * address is contained in its appropriate structure (i.e. struct * sockaddr_in or struct sockaddr_in6) the family of the address type * must be used to distinguish the address length (note that this * representation is termed a "packed array" of addresses). The caller * specifies the number of addresses in the array with addrcnt. * * On success, sctp_bindx() returns 0. On failure, sctp_bindx() returns * -1, and sets errno to the appropriate error code. * * For SCTP, the port given in each socket address must be the same, or * sctp_bindx() will fail, setting errno to EINVAL. * * The flags parameter is formed from the bitwise OR of zero or more of * the following currently defined flags: * * SCTP_BINDX_ADD_ADDR * * SCTP_BINDX_REM_ADDR * * SCTP_BINDX_ADD_ADDR directs SCTP to add the given addresses to the * association, and SCTP_BINDX_REM_ADDR directs SCTP to remove the given * addresses from the association. The two flags are mutually exclusive; * if both are given, sctp_bindx() will fail with EINVAL. A caller may * not remove all addresses from an association; sctp_bindx() will * reject such an attempt with EINVAL. * * An application can use sctp_bindx(SCTP_BINDX_ADD_ADDR) to associate * additional addresses with an endpoint after calling bind(). Or use * sctp_bindx(SCTP_BINDX_REM_ADDR) to remove some addresses a listening * socket is associated with so that no new association accepted will be * associated with those addresses. If the endpoint supports dynamic * address a SCTP_BINDX_REM_ADDR or SCTP_BINDX_ADD_ADDR may cause a * endpoint to send the appropriate message to the peer to change the * peers address lists. * * Adding and removing addresses from a connected association is * optional functionality. Implementations that do not support this * functionality should return EOPNOTSUPP. * * Basically do nothing but copying the addresses from user to kernel * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk. * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() * from userspace. * * On exit there is no need to do sockfd_put(), sys_setsockopt() does * it. * * sk The sk of the socket * addrs The pointer to the addresses * addrssize Size of the addrs buffer * op Operation to perform (add or remove, see the flags of * sctp_bindx) * * Returns 0 if ok, <0 errno code on error. */ static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *addrs, int addrs_size, int op) { int err; int addrcnt = 0; int walk_size = 0; struct sockaddr *sa_addr; void *addr_buf = addrs; struct sctp_af *af; pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", __func__, sk, addr_buf, addrs_size, op); if (unlikely(addrs_size <= 0)) return -EINVAL; /* Walk through the addrs buffer and count the number of addresses. */ while (walk_size < addrs_size) { if (walk_size + sizeof(sa_family_t) > addrs_size) return -EINVAL; sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); /* If the address family is not supported or if this address * causes the address buffer to overflow return EINVAL. */ if (!af || (walk_size + af->sockaddr_len) > addrs_size) return -EINVAL; addrcnt++; addr_buf += af->sockaddr_len; walk_size += af->sockaddr_len; } /* Do the work. */ switch (op) { case SCTP_BINDX_ADD_ADDR: /* Allow security module to validate bindx addresses. */ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD, addrs, addrs_size); if (err) return err; err = sctp_bindx_add(sk, addrs, addrcnt); if (err) return err; return sctp_send_asconf_add_ip(sk, addrs, addrcnt); case SCTP_BINDX_REM_ADDR: err = sctp_bindx_rem(sk, addrs, addrcnt); if (err) return err; return sctp_send_asconf_del_ip(sk, addrs, addrcnt); default: return -EINVAL; } } static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs, int addrlen) { int err; lock_sock(sk); err = sctp_setsockopt_bindx(sk, addrs, addrlen, SCTP_BINDX_ADD_ADDR); release_sock(sk); return err; } static int sctp_connect_new_asoc(struct sctp_endpoint *ep, const union sctp_addr *daddr, const struct sctp_initmsg *init, struct sctp_transport **tp) { struct sctp_association *asoc; struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); enum sctp_scope scope; int err; if (sctp_endpoint_is_peeled_off(ep, daddr)) return -EADDRNOTAVAIL; if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; } else { if (inet_port_requires_bind_service(net, ep->base.bind_addr.port) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; } scope = sctp_scope(daddr); asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); if (!asoc) return -ENOMEM; err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL); if (err < 0) goto free; *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); if (!*tp) { err = -ENOMEM; goto free; } if (!init) return 0; if (init->sinit_num_ostreams) { __u16 outcnt = init->sinit_num_ostreams; asoc->c.sinit_num_ostreams = outcnt; /* outcnt has been changed, need to re-init stream */ err = sctp_stream_init(&asoc->stream, outcnt, 0, GFP_KERNEL); if (err) goto free; } if (init->sinit_max_instreams) asoc->c.sinit_max_instreams = init->sinit_max_instreams; if (init->sinit_max_attempts) asoc->max_init_attempts = init->sinit_max_attempts; if (init->sinit_max_init_timeo) asoc->max_init_timeo = msecs_to_jiffies(init->sinit_max_init_timeo); return 0; free: sctp_association_free(asoc); return err; } static int sctp_connect_add_peer(struct sctp_association *asoc, union sctp_addr *daddr, int addr_len) { struct sctp_endpoint *ep = asoc->ep; struct sctp_association *old; struct sctp_transport *t; int err; err = sctp_verify_addr(ep->base.sk, daddr, addr_len); if (err) return err; old = sctp_endpoint_lookup_assoc(ep, daddr, &t); if (old && old != asoc) return old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN : -EALREADY; if (sctp_endpoint_is_peeled_off(ep, daddr)) return -EADDRNOTAVAIL; t = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); if (!t) return -ENOMEM; return 0; } /* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size) * * Common routine for handling connect() and sctp_connectx(). * Connect will come in with just a single address. */ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, int addrs_size, int flags, sctp_assoc_t *assoc_id) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct sctp_transport *transport; struct sctp_association *asoc; void *addr_buf = kaddrs; union sctp_addr *daddr; struct sctp_af *af; int walk_size, err; long timeo; if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) || (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) return -EISCONN; daddr = addr_buf; af = sctp_get_af_specific(daddr->sa.sa_family); if (!af || af->sockaddr_len > addrs_size) return -EINVAL; err = sctp_verify_addr(sk, daddr, af->sockaddr_len); if (err) return err; asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport); if (asoc) return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN : -EALREADY; err = sctp_connect_new_asoc(ep, daddr, NULL, &transport); if (err) return err; asoc = transport->asoc; addr_buf += af->sockaddr_len; walk_size = af->sockaddr_len; while (walk_size < addrs_size) { err = -EINVAL; if (walk_size + sizeof(sa_family_t) > addrs_size) goto out_free; daddr = addr_buf; af = sctp_get_af_specific(daddr->sa.sa_family); if (!af || af->sockaddr_len + walk_size > addrs_size) goto out_free; if (asoc->peer.port != ntohs(daddr->v4.sin_port)) goto out_free; err = sctp_connect_add_peer(asoc, daddr, af->sockaddr_len); if (err) goto out_free; addr_buf += af->sockaddr_len; walk_size += af->sockaddr_len; } /* In case the user of sctp_connectx() wants an association * id back, assign one now. */ if (assoc_id) { err = sctp_assoc_set_id(asoc, GFP_KERNEL); if (err < 0) goto out_free; } err = sctp_primitive_ASSOCIATE(sock_net(sk), asoc, NULL); if (err < 0) goto out_free; /* Initialize sk's dport and daddr for getpeername() */ inet_sk(sk)->inet_dport = htons(asoc->peer.port); sp->pf->to_sk_daddr(daddr, sk); sk->sk_err = 0; if (assoc_id) *assoc_id = asoc->assoc_id; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); return sctp_wait_for_connect(asoc, &timeo); out_free: pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n", __func__, asoc, kaddrs, err); sctp_association_free(asoc); return err; } /* Helper for tunneling sctp_connectx() requests through sctp_setsockopt() * * API 8.9 * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt, * sctp_assoc_t *asoc); * * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses. * If the sd is an IPv6 socket, the addresses passed can either be IPv4 * or IPv6 addresses. * * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see * Section 3.1.2 for this usage. * * addrs is a pointer to an array of one or more socket addresses. Each * address is contained in its appropriate structure (i.e. struct * sockaddr_in or struct sockaddr_in6) the family of the address type * must be used to distengish the address length (note that this * representation is termed a "packed array" of addresses). The caller * specifies the number of addresses in the array with addrcnt. * * On success, sctp_connectx() returns 0. It also sets the assoc_id to * the association id of the new association. On failure, sctp_connectx() * returns -1, and sets errno to the appropriate error code. The assoc_id * is not touched by the kernel. * * For SCTP, the port given in each socket address must be the same, or * sctp_connectx() will fail, setting errno to EINVAL. * * An application can use sctp_connectx to initiate an association with * an endpoint that is multi-homed. Much like sctp_bindx() this call * allows a caller to specify multiple addresses at which a peer can be * reached. The way the SCTP stack uses the list of addresses to set up * the association is implementation dependent. This function only * specifies that the stack will try to make use of all the addresses in * the list when needed. * * Note that the list of addresses passed in is only used for setting up * the association. It does not necessarily equal the set of addresses * the peer uses for the resulting association. If the caller wants to * find out the set of peer addresses, it must use sctp_getpaddrs() to * retrieve them after the association has been set up. * * Basically do nothing but copying the addresses from user to kernel * land and invoking either sctp_connectx(). This is used for tunneling * the sctp_connectx() request through sctp_setsockopt() from userspace. * * On exit there is no need to do sockfd_put(), sys_setsockopt() does * it. * * sk The sk of the socket * addrs The pointer to the addresses * addrssize Size of the addrs buffer * * Returns >=0 if ok, <0 errno code on error. */ static int __sctp_setsockopt_connectx(struct sock *sk, struct sockaddr *kaddrs, int addrs_size, sctp_assoc_t *assoc_id) { int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, kaddrs, addrs_size); /* make sure the 1st addr's sa_family is accessible later */ if (unlikely(addrs_size < sizeof(sa_family_t))) return -EINVAL; /* Allow security module to validate connectx addresses. */ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_CONNECTX, (struct sockaddr *)kaddrs, addrs_size); if (err) return err; /* in-kernel sockets don't generally have a file allocated to them * if all they do is call sock_create_kern(). */ if (sk->sk_socket->file) flags = sk->sk_socket->file->f_flags; return __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); } /* * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ static int sctp_setsockopt_connectx_old(struct sock *sk, struct sockaddr *kaddrs, int addrs_size) { return __sctp_setsockopt_connectx(sk, kaddrs, addrs_size, NULL); } /* * New interface for the API. The since the API is done with a socket * option, to make it simple we feed back the association id is as a return * indication to the call. Error is always negative and association id is * always positive. */ static int sctp_setsockopt_connectx(struct sock *sk, struct sockaddr *kaddrs, int addrs_size) { sctp_assoc_t assoc_id = 0; int err = 0; err = __sctp_setsockopt_connectx(sk, kaddrs, addrs_size, &assoc_id); if (err) return err; else return assoc_id; } /* * New (hopefully final) interface for the API. * We use the sctp_getaddrs_old structure so that use-space library * can avoid any unnecessary allocations. The only different part * is that we store the actual length of the address buffer into the * addrs_num structure member. That way we can re-use the existing * code. */ #ifdef CONFIG_COMPAT struct compat_sctp_getaddrs_old { sctp_assoc_t assoc_id; s32 addr_num; compat_uptr_t addrs; /* struct sockaddr * */ }; #endif static int sctp_getsockopt_connectx3(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; struct sockaddr *kaddrs; int err = 0; #ifdef CONFIG_COMPAT if (in_compat_syscall()) { struct compat_sctp_getaddrs_old param32; if (len < sizeof(param32)) return -EINVAL; if (copy_from_user(&param32, optval, sizeof(param32))) return -EFAULT; param.assoc_id = param32.assoc_id; param.addr_num = param32.addr_num; param.addrs = compat_ptr(param32.addrs); } else #endif { if (len < sizeof(param)) return -EINVAL; if (copy_from_user(&param, optval, sizeof(param))) return -EFAULT; } kaddrs = memdup_user(param.addrs, param.addr_num); if (IS_ERR(kaddrs)) return PTR_ERR(kaddrs); err = __sctp_setsockopt_connectx(sk, kaddrs, param.addr_num, &assoc_id); kfree(kaddrs); if (err == 0 || err == -EINPROGRESS) { if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) return -EFAULT; if (put_user(sizeof(assoc_id), optlen)) return -EFAULT; } return err; } /* API 3.1.4 close() - UDP Style Syntax * Applications use close() to perform graceful shutdown (as described in * Section 10.1 of [SCTP]) on ALL the associations currently represented * by a UDP-style socket. * * The syntax is * * ret = close(int sd); * * sd - the socket descriptor of the associations to be closed. * * To gracefully shutdown a specific association represented by the * UDP-style socket, an application should use the sendmsg() call, * passing no user data, but including the appropriate flag in the * ancillary data (see Section xxxx). * * If sd in the close() call is a branched-off socket representing only * one association, the shutdown is performed on that association only. * * 4.1.6 close() - TCP Style Syntax * * Applications use close() to gracefully close down an association. * * The syntax is: * * int close(int sd); * * sd - the socket descriptor of the association to be closed. * * After an application calls close() on a socket descriptor, no further * socket operations will succeed on that descriptor. * * API 7.1.4 SO_LINGER * * An application using the TCP-style socket can use this option to * perform the SCTP ABORT primitive. The linger option structure is: * * struct linger { * int l_onoff; // option on/off * int l_linger; // linger time * }; * * To enable the option, set l_onoff to 1. If the l_linger value is set * to 0, calling close() is the same as the ABORT primitive. If the * value is set to a negative value, the setsockopt() call will return * an error. If the value is set to a positive value linger_time, the * close() can be blocked for at most linger_time ms. If the graceful * shutdown phase does not finish during this period, close() will * return but the graceful shutdown phase continues in the system. */ static void sctp_close(struct sock *sk, long timeout) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; struct list_head *pos, *temp; unsigned int data_was_unread; pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; inet_sk_set_state(sk, SCTP_SS_CLOSING); ep = sctp_sk(sk)->ep; /* Clean up any skbs sitting on the receive queue. */ data_was_unread = sctp_queue_purge_ulpevents(&sk->sk_receive_queue); data_was_unread += sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby); /* Walk all associations on an endpoint. */ list_for_each_safe(pos, temp, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); if (sctp_style(sk, TCP)) { /* A closed association can still be in the list if * it belongs to a TCP-style listening socket that is * not yet accepted. If so, free it. If not, send an * ABORT or SHUTDOWN based on the linger options. */ if (sctp_state(asoc, CLOSED)) { sctp_association_free(asoc); continue; } } if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || !skb_queue_empty(&asoc->ulpq.reasm) || !skb_queue_empty(&asoc->ulpq.reasm_uo) || (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, NULL, 0); sctp_primitive_ABORT(net, asoc, chunk); } else sctp_primitive_SHUTDOWN(net, asoc, NULL); } /* On a TCP-style socket, block for at most linger_time if set. */ if (sctp_style(sk, TCP) && timeout) sctp_wait_for_close(sk, timeout); /* This will run the backlog queue. */ release_sock(sk); /* Supposedly, no process has access to the socket, but * the net layers still may. * Also, sctp_destroy_sock() needs to be called with addr_wq_lock * held and that should be grabbed before socket lock. */ spin_lock_bh(&net->sctp.addr_wq_lock); bh_lock_sock_nested(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. */ sock_hold(sk); sk_common_release(sk); bh_unlock_sock(sk); spin_unlock_bh(&net->sctp.addr_wq_lock); sock_put(sk); SCTP_DBG_OBJCNT_DEC(sock); } /* Handle EPIPE error. */ static int sctp_error(struct sock *sk, int flags, int err) { if (err == -EPIPE) err = sock_error(sk) ? : -EPIPE; if (err == -EPIPE && !(flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); return err; } /* API 3.1.3 sendmsg() - UDP Style Syntax * * An application uses sendmsg() and recvmsg() calls to transmit data to * and receive data from its peer. * * ssize_t sendmsg(int socket, const struct msghdr *message, * int flags); * * socket - the socket descriptor of the endpoint. * message - pointer to the msghdr structure which contains a single * user message and possibly some ancillary data. * * See Section 5 for complete description of the data * structures. * * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. * * Note: This function could use a rewrite especially when explicit * connect support comes in. */ /* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs); static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs, struct sctp_sndrcvinfo *srinfo, const struct msghdr *msg, size_t msg_len) { __u16 sflags; int err; if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP)) return -EPIPE; if (msg_len > sk->sk_sndbuf) return -EMSGSIZE; memset(cmsgs, 0, sizeof(*cmsgs)); err = sctp_msghdr_parse(msg, cmsgs); if (err) { pr_debug("%s: msghdr parse err:%x\n", __func__, err); return err; } memset(srinfo, 0, sizeof(*srinfo)); if (cmsgs->srinfo) { srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream; srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags; srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid; srinfo->sinfo_context = cmsgs->srinfo->sinfo_context; srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id; srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive; } if (cmsgs->sinfo) { srinfo->sinfo_stream = cmsgs->sinfo->snd_sid; srinfo->sinfo_flags = cmsgs->sinfo->snd_flags; srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid; srinfo->sinfo_context = cmsgs->sinfo->snd_context; srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id; } if (cmsgs->prinfo) { srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value; SCTP_PR_SET_POLICY(srinfo->sinfo_flags, cmsgs->prinfo->pr_policy); } sflags = srinfo->sinfo_flags; if (!sflags && msg_len) return 0; if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT))) return -EINVAL; if (((sflags & SCTP_EOF) && msg_len > 0) || (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0)) return -EINVAL; if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name) return -EINVAL; return 0; } static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, struct sctp_cmsgs *cmsgs, union sctp_addr *daddr, struct sctp_transport **tp) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; struct cmsghdr *cmsg; __be32 flowinfo = 0; struct sctp_af *af; int err; *tp = NULL; if (sflags & (SCTP_EOF | SCTP_ABORT)) return -EINVAL; if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING))) return -EADDRNOTAVAIL; /* Label connection socket for first association 1-to-many * style for client sequence socket()->sendmsg(). This * needs to be done before sctp_assoc_add_peer() as that will * set up the initial packet that needs to account for any * security ip options (CIPSO/CALIPSO) added to the packet. */ af = sctp_get_af_specific(daddr->sa.sa_family); if (!af) return -EINVAL; err = security_sctp_bind_connect(sk, SCTP_SENDMSG_CONNECT, (struct sockaddr *)daddr, af->sockaddr_len); if (err < 0) return err; err = sctp_connect_new_asoc(ep, daddr, cmsgs->init, tp); if (err) return err; asoc = (*tp)->asoc; if (!cmsgs->addrs_msg) return 0; if (daddr->sa.sa_family == AF_INET6) flowinfo = daddr->v6.sin6_flowinfo; /* sendv addr list parse */ for_each_cmsghdr(cmsg, cmsgs->addrs_msg) { union sctp_addr _daddr; int dlen; if (cmsg->cmsg_level != IPPROTO_SCTP || (cmsg->cmsg_type != SCTP_DSTADDRV4 && cmsg->cmsg_type != SCTP_DSTADDRV6)) continue; daddr = &_daddr; memset(daddr, 0, sizeof(*daddr)); dlen = cmsg->cmsg_len - sizeof(struct cmsghdr); if (cmsg->cmsg_type == SCTP_DSTADDRV4) { if (dlen < sizeof(struct in_addr)) { err = -EINVAL; goto free; } dlen = sizeof(struct in_addr); daddr->v4.sin_family = AF_INET; daddr->v4.sin_port = htons(asoc->peer.port); memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen); } else { if (dlen < sizeof(struct in6_addr)) { err = -EINVAL; goto free; } dlen = sizeof(struct in6_addr); daddr->v6.sin6_flowinfo = flowinfo; daddr->v6.sin6_family = AF_INET6; daddr->v6.sin6_port = htons(asoc->peer.port); memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen); } err = sctp_connect_add_peer(asoc, daddr, sizeof(*daddr)); if (err) goto free; } return 0; free: sctp_association_free(asoc); return err; } static int sctp_sendmsg_check_sflags(struct sctp_association *asoc, __u16 sflags, struct msghdr *msg, size_t msg_len) { struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) return -EPIPE; if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) && !sctp_state(asoc, ESTABLISHED)) return 0; if (sflags & SCTP_EOF) { pr_debug("%s: shutting down association:%p\n", __func__, asoc); sctp_primitive_SHUTDOWN(net, asoc, NULL); return 0; } if (sflags & SCTP_ABORT) { struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, msg, msg_len); if (!chunk) return -ENOMEM; pr_debug("%s: aborting association:%p\n", __func__, asoc); sctp_primitive_ABORT(net, asoc, chunk); iov_iter_revert(&msg->msg_iter, msg_len); return 0; } return 1; } static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, struct msghdr *msg, size_t msg_len, struct sctp_transport *transport, struct sctp_sndrcvinfo *sinfo) { struct sock *sk = asoc->base.sk; struct sctp_sock *sp = sctp_sk(sk); struct net *net = sock_net(sk); struct sctp_datamsg *datamsg; bool wait_connect = false; struct sctp_chunk *chunk; long timeo; int err; if (sinfo->sinfo_stream >= asoc->stream.outcnt) { err = -EINVAL; goto err; } if (unlikely(!SCTP_SO(&asoc->stream, sinfo->sinfo_stream)->ext)) { err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream); if (err) goto err; } if (sp->disable_fragments && msg_len > asoc->frag_point) { err = -EMSGSIZE; goto err; } if (asoc->pmtu_pending) { if (sp->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); asoc->pmtu_pending = 0; } if (sctp_wspace(asoc) < (int)msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) goto err; if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { err = -EINVAL; goto err; } } if (sctp_state(asoc, CLOSED)) { err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err) goto err; if (asoc->ep->intl_enable) { timeo = sock_sndtimeo(sk, 0); err = sctp_wait_for_connect(asoc, &timeo); if (err) { err = -ESRCH; goto err; } } else { wait_connect = true; } pr_debug("%s: we associated primitively\n", __func__); } datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter); if (IS_ERR(datamsg)) { err = PTR_ERR(datamsg); goto err; } asoc->force_delay = !!(msg->msg_flags & MSG_MORE); list_for_each_entry(chunk, &datamsg->chunks, frag_list) { sctp_chunk_hold(chunk); sctp_set_owner_w(chunk); chunk->transport = transport; } err = sctp_primitive_SEND(net, asoc, datamsg); if (err) { sctp_datamsg_free(datamsg); goto err; } pr_debug("%s: we sent primitively\n", __func__); sctp_datamsg_put(datamsg); if (unlikely(wait_connect)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); sctp_wait_for_connect(asoc, &timeo); } err = msg_len; err: return err; } static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk, const struct msghdr *msg, struct sctp_cmsgs *cmsgs) { union sctp_addr *daddr = NULL; int err; if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) { int len = msg->msg_namelen; if (len > sizeof(*daddr)) len = sizeof(*daddr); daddr = (union sctp_addr *)msg->msg_name; err = sctp_verify_addr(sk, daddr, len); if (err) return ERR_PTR(err); } return daddr; } static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, struct sctp_cmsgs *cmsgs) { if (!cmsgs->srinfo && !cmsgs->sinfo) { sinfo->sinfo_stream = asoc->default_stream; sinfo->sinfo_ppid = asoc->default_ppid; sinfo->sinfo_context = asoc->default_context; sinfo->sinfo_assoc_id = sctp_assoc2id(asoc); if (!cmsgs->prinfo) sinfo->sinfo_flags = asoc->default_flags; } if (!cmsgs->srinfo && !cmsgs->prinfo) sinfo->sinfo_timetolive = asoc->default_timetolive; if (cmsgs->authinfo) { /* Reuse sinfo_tsn to indicate that authinfo was set and * sinfo_ssn to save the keyid on tx path. */ sinfo->sinfo_tsn = 1; sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber; } } static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; __u16 sflags; int err; /* Parse and get snd_info */ err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len); if (err) goto out; sinfo = &_sinfo; sflags = sinfo->sinfo_flags; /* Get daddr from msg */ daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs); if (IS_ERR(daddr)) { err = PTR_ERR(daddr); goto out; } lock_sock(sk); /* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) continue; if (err < 0) goto out_unlock; sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs); err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, NULL, sinfo); if (err < 0) goto out_unlock; iov_iter_revert(&msg->msg_iter, err); } goto out_unlock; } /* Get and check or create asoc */ if (daddr) { asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport); if (asoc) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err <= 0) goto out_unlock; } else { err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr, &transport); if (err) goto out_unlock; asoc = transport->asoc; new = true; } if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER)) transport = NULL; } else { asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id); if (!asoc) { err = -EPIPE; goto out_unlock; } err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err <= 0) goto out_unlock; } /* Update snd_info with the asoc */ sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs); /* Send msg to the asoc */ err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo); if (err < 0 && err != -ESRCH && new) sctp_association_free(asoc); out_unlock: release_sock(sk); out: return sctp_error(sk, msg->msg_flags, err); } /* This is an extended version of skb_pull() that removes the data from the * start of a skb even when data is spread across the list of skb's in the * frag_list. len specifies the total amount of data that needs to be removed. * when 'len' bytes could be removed from the skb, it returns 0. * If 'len' exceeds the total skb length, it returns the no. of bytes that * could not be removed. */ static int sctp_skb_pull(struct sk_buff *skb, int len) { struct sk_buff *list; int skb_len = skb_headlen(skb); int rlen; if (len <= skb_len) { __skb_pull(skb, len); return 0; } len -= skb_len; __skb_pull(skb, skb_len); skb_walk_frags(skb, list) { rlen = sctp_skb_pull(list, len); skb->len -= (len-rlen); skb->data_len -= (len-rlen); if (!rlen) return 0; len = rlen; } return len; } /* API 3.1.3 recvmsg() - UDP Style Syntax * * ssize_t recvmsg(int socket, struct msghdr *message, * int flags); * * socket - the socket descriptor of the endpoint. * message - pointer to the msghdr structure which contains a single * user message and possibly some ancillary data. * * See Section 5 for complete description of the data * structures. * * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); struct sk_buff *skb, *head_skb; int copied; int err = 0; int skb_len; pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n", __func__, sk, msg, len, flags, addr_len); if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, flags & MSG_DONTWAIT); lock_sock(sk); if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) && !sctp_sstate(sk, CLOSING) && !sctp_sstate(sk, CLOSED)) { err = -ENOTCONN; goto out; } skb = sctp_skb_recv_datagram(sk, flags, &err); if (!skb) goto out; /* Get the total length of the skb including any skb's in the * frag_list. */ skb_len = skb->len; copied = skb_len; if (copied > len) copied = len; err = skb_copy_datagram_msg(skb, 0, msg, copied); event = sctp_skb2event(skb); if (err) goto out_free; if (event->chunk && event->chunk->head_skb) head_skb = event->chunk->head_skb; else head_skb = skb; sock_recv_cmsgs(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); } else { sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len); } /* Check if we allow SCTP_NXTINFO. */ if (sp->recvnxtinfo) sctp_ulpevent_read_nxtinfo(event, msg, sk); /* Check if we allow SCTP_RCVINFO. */ if (sp->recvrcvinfo) sctp_ulpevent_read_rcvinfo(event, msg); /* Check if we allow SCTP_SNDRCVINFO. */ if (sctp_ulpevent_type_enabled(sp->subscribe, SCTP_DATA_IO_EVENT)) sctp_ulpevent_read_sndrcvinfo(event, msg); err = copied; /* If skb's length exceeds the user's buffer, update the skb and * push it back to the receive_queue so that the next call to * recvmsg() will return the remaining data. Don't set MSG_EOR. */ if (skb_len > copied) { msg->msg_flags &= ~MSG_EOR; if (flags & MSG_PEEK) goto out_free; sctp_skb_pull(skb, copied); skb_queue_head(&sk->sk_receive_queue, skb); /* When only partial message is copied to the user, increase * rwnd by that amount. If all the data in the skb is read, * rwnd is updated when the event is freed. */ if (!sctp_ulpevent_is_notification(event)) sctp_assoc_rwnd_increase(event->asoc, copied); goto out; } else if ((event->msg_flags & MSG_NOTIFICATION) || (event->msg_flags & MSG_EOR)) msg->msg_flags |= MSG_EOR; else msg->msg_flags &= ~MSG_EOR; out_free: if (flags & MSG_PEEK) { /* Release the skb reference acquired after peeking the skb in * sctp_skb_recv_datagram(). */ kfree_skb(skb); } else { /* Free the event which includes releasing the reference to * the owner of the skb, freeing the skb and updating the * rwnd. */ sctp_ulpevent_free(event); } out: release_sock(sk); return err; } /* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) * * This option is a on/off flag. If enabled no SCTP message * fragmentation will be performed. Instead if a message being sent * exceeds the current PMTU size, the message will NOT be sent and * instead a error will be indicated to the user. */ static int sctp_setsockopt_disable_fragments(struct sock *sk, int *val, unsigned int optlen) { if (optlen < sizeof(int)) return -EINVAL; sctp_sk(sk)->disable_fragments = (*val == 0) ? 0 : 1; return 0; } static int sctp_setsockopt_events(struct sock *sk, __u8 *sn_type, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; int i; if (optlen > sizeof(struct sctp_event_subscribe)) return -EINVAL; for (i = 0; i < optlen; i++) sctp_ulpevent_type_set(&sp->subscribe, SCTP_SN_TYPE_BASE + i, sn_type[i]); list_for_each_entry(asoc, &sp->ep->asocs, asocs) asoc->subscribe = sctp_sk(sk)->subscribe; /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, * if there is no data to be sent or retransmit, the stack will * immediately send up this notification. */ if (sctp_ulpevent_type_enabled(sp->subscribe, SCTP_SENDER_DRY_EVENT)) { struct sctp_ulpevent *event; asoc = sctp_id2assoc(sk, 0); if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { event = sctp_ulpevent_make_sender_dry_event(asoc, GFP_USER | __GFP_NOWARN); if (!event) return -ENOMEM; asoc->stream.si->enqueue_event(&asoc->ulpq, event); } } return 0; } /* 7.1.8 Automatic Close of associations (SCTP_AUTOCLOSE) * * This socket option is applicable to the UDP-style socket only. When * set it will cause associations that are idle for more than the * specified number of seconds to automatically close. An association * being idle is defined an association that has NOT sent or received * user data. The special value of '0' indicates that no automatic * close of any associations should be performed. The option expects an * integer defining the number of seconds of idle time before an * association is closed. */ static int sctp_setsockopt_autoclose(struct sock *sk, u32 *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct net *net = sock_net(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) return -EOPNOTSUPP; if (optlen != sizeof(int)) return -EINVAL; sp->autoclose = *optval; if (sp->autoclose > net->sctp.max_autoclose) sp->autoclose = net->sctp.max_autoclose; return 0; } /* 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS) * * Applications can enable or disable heartbeats for any peer address of * an association, modify an address's heartbeat interval, force a * heartbeat to be sent immediately, and adjust the address's maximum * number of retransmissions sent before an address is considered * unreachable. The following structure is used to access and modify an * address's parameters: * * struct sctp_paddrparams { * sctp_assoc_t spp_assoc_id; * struct sockaddr_storage spp_address; * uint32_t spp_hbinterval; * uint16_t spp_pathmaxrxt; * uint32_t spp_pathmtu; * uint32_t spp_sackdelay; * uint32_t spp_flags; * uint32_t spp_ipv6_flowlabel; * uint8_t spp_dscp; * }; * * spp_assoc_id - (one-to-many style socket) This is filled in the * application, and identifies the association for * this query. * spp_address - This specifies which address is of interest. * spp_hbinterval - This contains the value of the heartbeat interval, * in milliseconds. If a value of zero * is present in this field then no changes are to * be made to this parameter. * spp_pathmaxrxt - This contains the maximum number of * retransmissions before this address shall be * considered unreachable. If a value of zero * is present in this field then no changes are to * be made to this parameter. * spp_pathmtu - When Path MTU discovery is disabled the value * specified here will be the "fixed" path mtu. * Note that if the spp_address field is empty * then all associations on this address will * have this fixed path mtu set upon them. * * spp_sackdelay - When delayed sack is enabled, this value specifies * the number of milliseconds that sacks will be delayed * for. This value will apply to all addresses of an * association if the spp_address field is empty. Note * also, that if delayed sack is enabled and this * value is set to 0, no change is made to the last * recorded delayed sack timer value. * * spp_flags - These flags are used to control various features * on an association. The flag field may contain * zero or more of the following options. * * SPP_HB_ENABLE - Enable heartbeats on the * specified address. Note that if the address * field is empty all addresses for the association * have heartbeats enabled upon them. * * SPP_HB_DISABLE - Disable heartbeats on the * speicifed address. Note that if the address * field is empty all addresses for the association * will have their heartbeats disabled. Note also * that SPP_HB_ENABLE and SPP_HB_DISABLE are * mutually exclusive, only one of these two should * be specified. Enabling both fields will have * undetermined results. * * SPP_HB_DEMAND - Request a user initiated heartbeat * to be made immediately. * * SPP_HB_TIME_IS_ZERO - Specify's that the time for * heartbeat delayis to be set to the value of 0 * milliseconds. * * SPP_PMTUD_ENABLE - This field will enable PMTU * discovery upon the specified address. Note that * if the address feild is empty then all addresses * on the association are effected. * * SPP_PMTUD_DISABLE - This field will disable PMTU * discovery upon the specified address. Note that * if the address feild is empty then all addresses * on the association are effected. Not also that * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually * exclusive. Enabling both will have undetermined * results. * * SPP_SACKDELAY_ENABLE - Setting this flag turns * on delayed sack. The time specified in spp_sackdelay * is used to specify the sack delay for this address. Note * that if spp_address is empty then all addresses will * enable delayed sack and take on the sack delay * value specified in spp_sackdelay. * SPP_SACKDELAY_DISABLE - Setting this flag turns * off delayed sack. If the spp_address field is blank then * delayed sack is disabled for the entire association. Note * also that this field is mutually exclusive to * SPP_SACKDELAY_ENABLE, setting both will have undefined * results. * * SPP_IPV6_FLOWLABEL: Setting this flag enables the * setting of the IPV6 flow label value. The value is * contained in the spp_ipv6_flowlabel field. * Upon retrieval, this flag will be set to indicate that * the spp_ipv6_flowlabel field has a valid value returned. * If a specific destination address is set (in the * spp_address field), then the value returned is that of * the address. If just an association is specified (and * no address), then the association's default flow label * is returned. If neither an association nor a destination * is specified, then the socket's default flow label is * returned. For non-IPv6 sockets, this flag will be left * cleared. * * SPP_DSCP: Setting this flag enables the setting of the * Differentiated Services Code Point (DSCP) value * associated with either the association or a specific * address. The value is obtained in the spp_dscp field. * Upon retrieval, this flag will be set to indicate that * the spp_dscp field has a valid value returned. If a * specific destination address is set when called (in the * spp_address field), then that specific destination * address's DSCP value is returned. If just an association * is specified, then the association's default DSCP is * returned. If neither an association nor a destination is * specified, then the socket's default DSCP is returned. * * spp_ipv6_flowlabel * - This field is used in conjunction with the * SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label. * The 20 least significant bits are used for the flow * label. This setting has precedence over any IPv6-layer * setting. * * spp_dscp - This field is used in conjunction with the SPP_DSCP flag * and contains the DSCP. The 6 most significant bits are * used for the DSCP. This setting has precedence over any * IPv4- or IPv6- layer setting. */ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, struct sctp_transport *trans, struct sctp_association *asoc, struct sctp_sock *sp, int hb_change, int pmtud_change, int sackdelay_change) { int error; if (params->spp_flags & SPP_HB_DEMAND && trans) { error = sctp_primitive_REQUESTHEARTBEAT(trans->asoc->base.net, trans->asoc, trans); if (error) return error; } /* Note that unless the spp_flag is set to SPP_HB_ENABLE the value of * this field is ignored. Note also that a value of zero indicates * the current setting should be left unchanged. */ if (params->spp_flags & SPP_HB_ENABLE) { /* Re-zero the interval if the SPP_HB_TIME_IS_ZERO is * set. This lets us use 0 value when this flag * is set. */ if (params->spp_flags & SPP_HB_TIME_IS_ZERO) params->spp_hbinterval = 0; if (params->spp_hbinterval || (params->spp_flags & SPP_HB_TIME_IS_ZERO)) { if (trans) { trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); sctp_transport_reset_hb_timer(trans); } else if (asoc) { asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); } else { sp->hbinterval = params->spp_hbinterval; } } } if (hb_change) { if (trans) { trans->param_flags = (trans->param_flags & ~SPP_HB) | hb_change; } else if (asoc) { asoc->param_flags = (asoc->param_flags & ~SPP_HB) | hb_change; } else { sp->param_flags = (sp->param_flags & ~SPP_HB) | hb_change; } } /* When Path MTU discovery is disabled the value specified here will * be the "fixed" path mtu (i.e. the value of the spp_flags field must * include the flag SPP_PMTUD_DISABLE for this field to have any * effect). */ if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; sctp_assoc_sync_pmtu(asoc); } else if (asoc) { sctp_assoc_set_pmtu(asoc, params->spp_pathmtu); } else { sp->pathmtu = params->spp_pathmtu; } } if (pmtud_change) { if (trans) { int update = (trans->param_flags & SPP_PMTUD_DISABLE) && (params->spp_flags & SPP_PMTUD_ENABLE); trans->param_flags = (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); sctp_assoc_sync_pmtu(asoc); } sctp_transport_pl_reset(trans); } else if (asoc) { asoc->param_flags = (asoc->param_flags & ~SPP_PMTUD) | pmtud_change; } else { sp->param_flags = (sp->param_flags & ~SPP_PMTUD) | pmtud_change; } } /* Note that unless the spp_flag is set to SPP_SACKDELAY_ENABLE the * value of this field is ignored. Note also that a value of zero * indicates the current setting should be left unchanged. */ if ((params->spp_flags & SPP_SACKDELAY_ENABLE) && params->spp_sackdelay) { if (trans) { trans->sackdelay = msecs_to_jiffies(params->spp_sackdelay); } else if (asoc) { asoc->sackdelay = msecs_to_jiffies(params->spp_sackdelay); } else { sp->sackdelay = params->spp_sackdelay; } } if (sackdelay_change) { if (trans) { trans->param_flags = (trans->param_flags & ~SPP_SACKDELAY) | sackdelay_change; } else if (asoc) { asoc->param_flags = (asoc->param_flags & ~SPP_SACKDELAY) | sackdelay_change; } else { sp->param_flags = (sp->param_flags & ~SPP_SACKDELAY) | sackdelay_change; } } /* Note that a value of zero indicates the current setting should be left unchanged. */ if (params->spp_pathmaxrxt) { if (trans) { trans->pathmaxrxt = params->spp_pathmaxrxt; } else if (asoc) { asoc->pathmaxrxt = params->spp_pathmaxrxt; } else { sp->pathmaxrxt = params->spp_pathmaxrxt; } } if (params->spp_flags & SPP_IPV6_FLOWLABEL) { if (trans) { if (trans->ipaddr.sa.sa_family == AF_INET6) { trans->flowlabel = params->spp_ipv6_flowlabel & SCTP_FLOWLABEL_VAL_MASK; trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK; } } else if (asoc) { struct sctp_transport *t; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->ipaddr.sa.sa_family != AF_INET6) continue; t->flowlabel = params->spp_ipv6_flowlabel & SCTP_FLOWLABEL_VAL_MASK; t->flowlabel |= SCTP_FLOWLABEL_SET_MASK; } asoc->flowlabel = params->spp_ipv6_flowlabel & SCTP_FLOWLABEL_VAL_MASK; asoc->flowlabel |= SCTP_FLOWLABEL_SET_MASK; } else if (sctp_opt2sk(sp)->sk_family == AF_INET6) { sp->flowlabel = params->spp_ipv6_flowlabel & SCTP_FLOWLABEL_VAL_MASK; sp->flowlabel |= SCTP_FLOWLABEL_SET_MASK; } } if (params->spp_flags & SPP_DSCP) { if (trans) { trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK; trans->dscp |= SCTP_DSCP_SET_MASK; } else if (asoc) { struct sctp_transport *t; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { t->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK; t->dscp |= SCTP_DSCP_SET_MASK; } asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK; asoc->dscp |= SCTP_DSCP_SET_MASK; } else { sp->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK; sp->dscp |= SCTP_DSCP_SET_MASK; } } return 0; } static int sctp_setsockopt_peer_addr_params(struct sock *sk, struct sctp_paddrparams *params, unsigned int optlen) { struct sctp_transport *trans = NULL; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); int error; int hb_change, pmtud_change, sackdelay_change; if (optlen == ALIGN(offsetof(struct sctp_paddrparams, spp_ipv6_flowlabel), 4)) { if (params->spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL)) return -EINVAL; } else if (optlen != sizeof(*params)) { return -EINVAL; } /* Validate flags and value parameters. */ hb_change = params->spp_flags & SPP_HB; pmtud_change = params->spp_flags & SPP_PMTUD; sackdelay_change = params->spp_flags & SPP_SACKDELAY; if (hb_change == SPP_HB || pmtud_change == SPP_PMTUD || sackdelay_change == SPP_SACKDELAY || params->spp_sackdelay > 500 || (params->spp_pathmtu && params->spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) return -EINVAL; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ if (!sctp_is_any(sk, (union sctp_addr *)&params->spp_address)) { trans = sctp_addr_id2transport(sk, &params->spp_address, params->spp_assoc_id); if (!trans) return -EINVAL; } /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params->spp_assoc_id); if (!asoc && params->spp_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* Heartbeat demand can only be sent on a transport or * association, but not a socket. */ if (params->spp_flags & SPP_HB_DEMAND && !trans && !asoc) return -EINVAL; /* Process parameters. */ error = sctp_apply_peer_addr_params(params, trans, asoc, sp, hb_change, pmtud_change, sackdelay_change); if (error) return error; /* If changes are for association, also apply parameters to each * transport. */ if (!trans && asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { sctp_apply_peer_addr_params(params, trans, asoc, sp, hb_change, pmtud_change, sackdelay_change); } } return 0; } static inline __u32 sctp_spp_sackdelay_enable(__u32 param_flags) { return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; } static inline __u32 sctp_spp_sackdelay_disable(__u32 param_flags) { return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; } static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, struct sctp_association *asoc) { struct sctp_transport *trans; if (params->sack_delay) { asoc->sackdelay = msecs_to_jiffies(params->sack_delay); asoc->param_flags = sctp_spp_sackdelay_enable(asoc->param_flags); } if (params->sack_freq == 1) { asoc->param_flags = sctp_spp_sackdelay_disable(asoc->param_flags); } else if (params->sack_freq > 1) { asoc->sackfreq = params->sack_freq; asoc->param_flags = sctp_spp_sackdelay_enable(asoc->param_flags); } list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { if (params->sack_delay) { trans->sackdelay = msecs_to_jiffies(params->sack_delay); trans->param_flags = sctp_spp_sackdelay_enable(trans->param_flags); } if (params->sack_freq == 1) { trans->param_flags = sctp_spp_sackdelay_disable(trans->param_flags); } else if (params->sack_freq > 1) { trans->sackfreq = params->sack_freq; trans->param_flags = sctp_spp_sackdelay_enable(trans->param_flags); } } } /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * * This option will effect the way delayed acks are performed. This * option allows you to get or set the delayed ack time, in * milliseconds. It also allows changing the delayed ack frequency. * Changing the frequency to 1 disables the delayed sack algorithm. If * the assoc_id is 0, then this sets or gets the endpoints default * values. If the assoc_id field is non-zero, then the set or get * effects the specified association for the one to many model (the * assoc_id field is ignored by the one to one model). Note that if * sack_delay or sack_freq are 0 when setting this option, then the * current values will remain unchanged. * * struct sctp_sack_info { * sctp_assoc_t sack_assoc_id; * uint32_t sack_delay; * uint32_t sack_freq; * }; * * sack_assoc_id - This parameter, indicates which association the user * is performing an action upon. Note that if this field's value is * zero then the endpoints default value is changed (effecting future * associations only). * * sack_delay - This parameter contains the number of milliseconds that * the user is requesting the delayed ACK timer be set to. Note that * this value is defined in the standard to be between 200 and 500 * milliseconds. * * sack_freq - This parameter contains the number of packets that must * be received before a sack is sent without waiting for the delay * timer to expire. The default value for this is 2, setting this * value to 1 will disable the delayed sack algorithm. */ static int __sctp_setsockopt_delayed_ack(struct sock *sk, struct sctp_sack_info *params) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; /* Validate value parameter. */ if (params->sack_delay > 500) return -EINVAL; /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params->sack_assoc_id); if (!asoc && params->sack_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { sctp_apply_asoc_delayed_ack(params, asoc); return 0; } if (sctp_style(sk, TCP)) params->sack_assoc_id = SCTP_FUTURE_ASSOC; if (params->sack_assoc_id == SCTP_FUTURE_ASSOC || params->sack_assoc_id == SCTP_ALL_ASSOC) { if (params->sack_delay) { sp->sackdelay = params->sack_delay; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } if (params->sack_freq == 1) { sp->param_flags = sctp_spp_sackdelay_disable(sp->param_flags); } else if (params->sack_freq > 1) { sp->sackfreq = params->sack_freq; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } } if (params->sack_assoc_id == SCTP_CURRENT_ASSOC || params->sack_assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &sp->ep->asocs, asocs) sctp_apply_asoc_delayed_ack(params, asoc); return 0; } static int sctp_setsockopt_delayed_ack(struct sock *sk, struct sctp_sack_info *params, unsigned int optlen) { if (optlen == sizeof(struct sctp_assoc_value)) { struct sctp_assoc_value *v = (struct sctp_assoc_value *)params; struct sctp_sack_info p; pr_warn_ratelimited(DEPRECATED "%s (pid %d) " "Use of struct sctp_assoc_value in delayed_ack socket option.\n" "Use struct sctp_sack_info instead\n", current->comm, task_pid_nr(current)); p.sack_assoc_id = v->assoc_id; p.sack_delay = v->assoc_value; p.sack_freq = v->assoc_value ? 0 : 1; return __sctp_setsockopt_delayed_ack(sk, &p); } if (optlen != sizeof(struct sctp_sack_info)) return -EINVAL; if (params->sack_delay == 0 && params->sack_freq == 0) return 0; return __sctp_setsockopt_delayed_ack(sk, params); } /* 7.1.3 Initialization Parameters (SCTP_INITMSG) * * Applications can specify protocol parameters for the default association * initialization. The option name argument to setsockopt() and getsockopt() * is SCTP_INITMSG. * * Setting initialization parameters is effective only on an unconnected * socket (for UDP-style sockets only future associations are effected * by the change). With TCP-style sockets, this option is inherited by * sockets derived from a listener socket. */ static int sctp_setsockopt_initmsg(struct sock *sk, struct sctp_initmsg *sinit, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof(struct sctp_initmsg)) return -EINVAL; if (sinit->sinit_num_ostreams) sp->initmsg.sinit_num_ostreams = sinit->sinit_num_ostreams; if (sinit->sinit_max_instreams) sp->initmsg.sinit_max_instreams = sinit->sinit_max_instreams; if (sinit->sinit_max_attempts) sp->initmsg.sinit_max_attempts = sinit->sinit_max_attempts; if (sinit->sinit_max_init_timeo) sp->initmsg.sinit_max_init_timeo = sinit->sinit_max_init_timeo; return 0; } /* * 7.1.14 Set default send parameters (SCTP_DEFAULT_SEND_PARAM) * * Applications that wish to use the sendto() system call may wish to * specify a default set of parameters that would normally be supplied * through the inclusion of ancillary data. This socket option allows * such an application to set the default sctp_sndrcvinfo structure. * The application that wishes to use this socket option simply passes * in to this call the sctp_sndrcvinfo structure defined in Section * 5.2.2) The input parameters accepted by this call include * sinfo_stream, sinfo_flags, sinfo_ppid, sinfo_context, * sinfo_timetolive. The user must provide the sinfo_assoc_id field in * to this call if the caller is using the UDP model. */ static int sctp_setsockopt_default_send_param(struct sock *sk, struct sctp_sndrcvinfo *info, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; if (optlen != sizeof(*info)) return -EINVAL; if (info->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_ABORT | SCTP_EOF)) return -EINVAL; asoc = sctp_id2assoc(sk, info->sinfo_assoc_id); if (!asoc && info->sinfo_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { asoc->default_stream = info->sinfo_stream; asoc->default_flags = info->sinfo_flags; asoc->default_ppid = info->sinfo_ppid; asoc->default_context = info->sinfo_context; asoc->default_timetolive = info->sinfo_timetolive; return 0; } if (sctp_style(sk, TCP)) info->sinfo_assoc_id = SCTP_FUTURE_ASSOC; if (info->sinfo_assoc_id == SCTP_FUTURE_ASSOC || info->sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info->sinfo_stream; sp->default_flags = info->sinfo_flags; sp->default_ppid = info->sinfo_ppid; sp->default_context = info->sinfo_context; sp->default_timetolive = info->sinfo_timetolive; } if (info->sinfo_assoc_id == SCTP_CURRENT_ASSOC || info->sinfo_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { asoc->default_stream = info->sinfo_stream; asoc->default_flags = info->sinfo_flags; asoc->default_ppid = info->sinfo_ppid; asoc->default_context = info->sinfo_context; asoc->default_timetolive = info->sinfo_timetolive; } } return 0; } /* RFC6458, Section 8.1.31. Set/get Default Send Parameters * (SCTP_DEFAULT_SNDINFO) */ static int sctp_setsockopt_default_sndinfo(struct sock *sk, struct sctp_sndinfo *info, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; if (optlen != sizeof(*info)) return -EINVAL; if (info->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_ABORT | SCTP_EOF)) return -EINVAL; asoc = sctp_id2assoc(sk, info->snd_assoc_id); if (!asoc && info->snd_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { asoc->default_stream = info->snd_sid; asoc->default_flags = info->snd_flags; asoc->default_ppid = info->snd_ppid; asoc->default_context = info->snd_context; return 0; } if (sctp_style(sk, TCP)) info->snd_assoc_id = SCTP_FUTURE_ASSOC; if (info->snd_assoc_id == SCTP_FUTURE_ASSOC || info->snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info->snd_sid; sp->default_flags = info->snd_flags; sp->default_ppid = info->snd_ppid; sp->default_context = info->snd_context; } if (info->snd_assoc_id == SCTP_CURRENT_ASSOC || info->snd_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { asoc->default_stream = info->snd_sid; asoc->default_flags = info->snd_flags; asoc->default_ppid = info->snd_ppid; asoc->default_context = info->snd_context; } } return 0; } /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) * * Requests that the local SCTP stack use the enclosed peer address as * the association primary. The enclosed address must be one of the * association peer's addresses. */ static int sctp_setsockopt_primary_addr(struct sock *sk, struct sctp_prim *prim, unsigned int optlen) { struct sctp_transport *trans; struct sctp_af *af; int err; if (optlen != sizeof(struct sctp_prim)) return -EINVAL; /* Allow security module to validate address but need address len. */ af = sctp_get_af_specific(prim->ssp_addr.ss_family); if (!af) return -EINVAL; err = security_sctp_bind_connect(sk, SCTP_PRIMARY_ADDR, (struct sockaddr *)&prim->ssp_addr, af->sockaddr_len); if (err) return err; trans = sctp_addr_id2transport(sk, &prim->ssp_addr, prim->ssp_assoc_id); if (!trans) return -EINVAL; sctp_assoc_set_primary(trans->asoc, trans); return 0; } /* * 7.1.5 SCTP_NODELAY * * Turn on/off any Nagle-like algorithm. This means that packets are * generally sent as soon as possible and no unnecessary delays are * introduced, at the cost of more packets in the network. Expects an * integer boolean flag. */ static int sctp_setsockopt_nodelay(struct sock *sk, int *val, unsigned int optlen) { if (optlen < sizeof(int)) return -EINVAL; sctp_sk(sk)->nodelay = (*val == 0) ? 0 : 1; return 0; } /* * * 7.1.1 SCTP_RTOINFO * * The protocol parameters used to initialize and bound retransmission * timeout (RTO) are tunable. sctp_rtoinfo structure is used to access * and modify these parameters. * All parameters are time values, in milliseconds. A value of 0, when * modifying the parameters, indicates that the current value should not * be changed. * */ static int sctp_setsockopt_rtoinfo(struct sock *sk, struct sctp_rtoinfo *rtoinfo, unsigned int optlen) { struct sctp_association *asoc; unsigned long rto_min, rto_max; struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof (struct sctp_rtoinfo)) return -EINVAL; asoc = sctp_id2assoc(sk, rtoinfo->srto_assoc_id); /* Set the values to the specific association */ if (!asoc && rtoinfo->srto_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; rto_max = rtoinfo->srto_max; rto_min = rtoinfo->srto_min; if (rto_max) rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max; else rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max; if (rto_min) rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min; else rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min; if (rto_min > rto_max) return -EINVAL; if (asoc) { if (rtoinfo->srto_initial != 0) asoc->rto_initial = msecs_to_jiffies(rtoinfo->srto_initial); asoc->rto_max = rto_max; asoc->rto_min = rto_min; } else { /* If there is no association or the association-id = 0 * set the values to the endpoint. */ if (rtoinfo->srto_initial != 0) sp->rtoinfo.srto_initial = rtoinfo->srto_initial; sp->rtoinfo.srto_max = rto_max; sp->rtoinfo.srto_min = rto_min; } return 0; } /* * * 7.1.2 SCTP_ASSOCINFO * * This option is used to tune the maximum retransmission attempts * of the association. * Returns an error if the new association retransmission value is * greater than the sum of the retransmission value of the peer. * See [SCTP] for more information. * */ static int sctp_setsockopt_associnfo(struct sock *sk, struct sctp_assocparams *assocparams, unsigned int optlen) { struct sctp_association *asoc; if (optlen != sizeof(struct sctp_assocparams)) return -EINVAL; asoc = sctp_id2assoc(sk, assocparams->sasoc_assoc_id); if (!asoc && assocparams->sasoc_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* Set the values to the specific association */ if (asoc) { if (assocparams->sasoc_asocmaxrxt != 0) { __u32 path_sum = 0; int paths = 0; struct sctp_transport *peer_addr; list_for_each_entry(peer_addr, &asoc->peer.transport_addr_list, transports) { path_sum += peer_addr->pathmaxrxt; paths++; } /* Only validate asocmaxrxt if we have more than * one path/transport. We do this because path * retransmissions are only counted when we have more * then one path. */ if (paths > 1 && assocparams->sasoc_asocmaxrxt > path_sum) return -EINVAL; asoc->max_retrans = assocparams->sasoc_asocmaxrxt; } if (assocparams->sasoc_cookie_life != 0) asoc->cookie_life = ms_to_ktime(assocparams->sasoc_cookie_life); } else { /* Set the values to the endpoint */ struct sctp_sock *sp = sctp_sk(sk); if (assocparams->sasoc_asocmaxrxt != 0) sp->assocparams.sasoc_asocmaxrxt = assocparams->sasoc_asocmaxrxt; if (assocparams->sasoc_cookie_life != 0) sp->assocparams.sasoc_cookie_life = assocparams->sasoc_cookie_life; } return 0; } /* * 7.1.16 Set/clear IPv4 mapped addresses (SCTP_I_WANT_MAPPED_V4_ADDR) * * This socket option is a boolean flag which turns on or off mapped V4 * addresses. If this option is turned on and the socket is type * PF_INET6, then IPv4 addresses will be mapped to V6 representation. * If this option is turned off, then no mapping will be done of V4 * addresses and a user will receive both PF_INET6 and PF_INET type * addresses on the socket. */ static int sctp_setsockopt_mappedv4(struct sock *sk, int *val, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); if (optlen < sizeof(int)) return -EINVAL; if (*val) sp->v4mapped = 1; else sp->v4mapped = 0; return 0; } /* * 8.1.16. Get or Set the Maximum Fragmentation Size (SCTP_MAXSEG) * This option will get or set the maximum size to put in any outgoing * SCTP DATA chunk. If a message is larger than this size it will be * fragmented by SCTP into the specified size. Note that the underlying * SCTP implementation may fragment into smaller sized chunks when the * PMTU of the underlying association is smaller than the value set by * the user. The default value for this option is '0' which indicates * the user is NOT limiting fragmentation and only the PMTU will effect * SCTP's choice of DATA chunk size. Note also that values set larger * than the maximum size of an IP datagram will effectively let SCTP * control fragmentation (i.e. the same as setting this option to 0). * * The following structure is used to access and modify this parameter: * * struct sctp_assoc_value { * sctp_assoc_t assoc_id; * uint32_t assoc_value; * }; * * assoc_id: This parameter is ignored for one-to-one style sockets. * For one-to-many style sockets this parameter indicates which * association the user is performing an action upon. Note that if * this field's value is zero then the endpoints default value is * changed (effecting future associations only). * assoc_value: This parameter specifies the maximum size in bytes. */ static int sctp_setsockopt_maxseg(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; sctp_assoc_t assoc_id; int val; if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED "%s (pid %d) " "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); assoc_id = SCTP_FUTURE_ASSOC; val = *(int *)params; } else if (optlen == sizeof(struct sctp_assoc_value)) { assoc_id = params->assoc_id; val = params->assoc_value; } else { return -EINVAL; } asoc = sctp_id2assoc(sk, assoc_id); if (!asoc && assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (val) { int min_len, max_len; __u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) : sizeof(struct sctp_data_chunk); min_len = sctp_min_frag_point(sp, datasize); max_len = SCTP_MAX_CHUNK_LEN - datasize; if (val < min_len || val > max_len) return -EINVAL; } if (asoc) { asoc->user_frag = val; sctp_assoc_update_frag_point(asoc); } else { sp->user_frag = val; } return 0; } /* * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR) * * Requests that the peer mark the enclosed address as the association * primary. The enclosed address must be one of the association's * locally bound addresses. The following structure is used to make a * set primary request: */ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, struct sctp_setpeerprim *prim, unsigned int optlen) { struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_chunk *chunk; struct sctp_af *af; int err; sp = sctp_sk(sk); if (!sp->ep->asconf_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) return -EINVAL; asoc = sctp_id2assoc(sk, prim->sspp_assoc_id); if (!asoc) return -EINVAL; if (!asoc->peer.asconf_capable) return -EPERM; if (asoc->peer.addip_disabled_mask & SCTP_PARAM_SET_PRIMARY) return -EPERM; if (!sctp_state(asoc, ESTABLISHED)) return -ENOTCONN; af = sctp_get_af_specific(prim->sspp_addr.ss_family); if (!af) return -EINVAL; if (!af->addr_valid((union sctp_addr *)&prim->sspp_addr, sp, NULL)) return -EADDRNOTAVAIL; if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim->sspp_addr)) return -EADDRNOTAVAIL; /* Allow security module to validate address. */ err = security_sctp_bind_connect(sk, SCTP_SET_PEER_PRIMARY_ADDR, (struct sockaddr *)&prim->sspp_addr, af->sockaddr_len); if (err) return err; /* Create an ASCONF chunk with SET_PRIMARY parameter */ chunk = sctp_make_asconf_set_prim(asoc, (union sctp_addr *)&prim->sspp_addr); if (!chunk) return -ENOMEM; err = sctp_send_asconf(asoc, chunk); pr_debug("%s: we set peer primary addr primitively\n", __func__); return err; } static int sctp_setsockopt_adaptation_layer(struct sock *sk, struct sctp_setadaptation *adapt, unsigned int optlen) { if (optlen != sizeof(struct sctp_setadaptation)) return -EINVAL; sctp_sk(sk)->adaptation_ind = adapt->ssb_adaptation_ind; return 0; } /* * 7.1.29. Set or Get the default context (SCTP_CONTEXT) * * The context field in the sctp_sndrcvinfo structure is normally only * used when a failed message is retrieved holding the value that was * sent down on the actual send call. This option allows the setting of * a default context on an association basis that will be received on * reading messages from the peer. This is especially helpful in the * one-2-many model for an application to keep some reference to an * internal state machine that is processing messages on the * association. Note that the setting of this value only effects * received messages from the peer and does not effect the value that is * saved with outbound messages. */ static int sctp_setsockopt_context(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; if (optlen != sizeof(struct sctp_assoc_value)) return -EINVAL; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { asoc->default_rcv_context = params->assoc_value; return 0; } if (sctp_style(sk, TCP)) params->assoc_id = SCTP_FUTURE_ASSOC; if (params->assoc_id == SCTP_FUTURE_ASSOC || params->assoc_id == SCTP_ALL_ASSOC) sp->default_rcv_context = params->assoc_value; if (params->assoc_id == SCTP_CURRENT_ASSOC || params->assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &sp->ep->asocs, asocs) asoc->default_rcv_context = params->assoc_value; return 0; } /* * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE) * * This options will at a minimum specify if the implementation is doing * fragmented interleave. Fragmented interleave, for a one to many * socket, is when subsequent calls to receive a message may return * parts of messages from different associations. Some implementations * may allow you to turn this value on or off. If so, when turned off, * no fragment interleave will occur (which will cause a head of line * blocking amongst multiple associations sharing the same one to many * socket). When this option is turned on, then each receive call may * come from a different association (thus the user must receive data * with the extended calls (e.g. sctp_recvmsg) to keep track of which * association each receive belongs to. * * This option takes a boolean value. A non-zero value indicates that * fragmented interleave is on. A value of zero indicates that * fragmented interleave is off. * * Note that it is important that an implementation that allows this * option to be turned on, have it off by default. Otherwise an unaware * application using the one to many model may become confused and act * incorrectly. */ static int sctp_setsockopt_fragment_interleave(struct sock *sk, int *val, unsigned int optlen) { if (optlen != sizeof(int)) return -EINVAL; sctp_sk(sk)->frag_interleave = !!*val; if (!sctp_sk(sk)->frag_interleave) sctp_sk(sk)->ep->intl_enable = 0; return 0; } /* * 8.1.21. Set or Get the SCTP Partial Delivery Point * (SCTP_PARTIAL_DELIVERY_POINT) * * This option will set or get the SCTP partial delivery point. This * point is the size of a message where the partial delivery API will be * invoked to help free up rwnd space for the peer. Setting this to a * lower value will cause partial deliveries to happen more often. The * calls argument is an integer that sets or gets the partial delivery * point. Note also that the call will fail if the user attempts to set * this value larger than the socket receive buffer size. * * Note that any single message having a length smaller than or equal to * the SCTP partial delivery point will be delivered in one single read * call as long as the user provided buffer is large enough to hold the * message. */ static int sctp_setsockopt_partial_delivery_point(struct sock *sk, u32 *val, unsigned int optlen) { if (optlen != sizeof(u32)) return -EINVAL; /* Note: We double the receive buffer from what the user sets * it to be, also initial rwnd is based on rcvbuf/2. */ if (*val > (sk->sk_rcvbuf >> 1)) return -EINVAL; sctp_sk(sk)->pd_point = *val; return 0; /* is this the right error code? */ } /* * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST) * * This option will allow a user to change the maximum burst of packets * that can be emitted by this association. Note that the default value * is 4, and some implementations may restrict this setting so that it * can only be lowered. * * NOTE: This text doesn't seem right. Do this on a socket basis with * future associations inheriting the socket value. */ static int sctp_setsockopt_maxburst(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; sctp_assoc_t assoc_id; u32 assoc_value; if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED "%s (pid %d) " "Use of int in max_burst socket option deprecated.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); assoc_id = SCTP_FUTURE_ASSOC; assoc_value = *((int *)params); } else if (optlen == sizeof(struct sctp_assoc_value)) { assoc_id = params->assoc_id; assoc_value = params->assoc_value; } else return -EINVAL; asoc = sctp_id2assoc(sk, assoc_id); if (!asoc && assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { asoc->max_burst = assoc_value; return 0; } if (sctp_style(sk, TCP)) assoc_id = SCTP_FUTURE_ASSOC; if (assoc_id == SCTP_FUTURE_ASSOC || assoc_id == SCTP_ALL_ASSOC) sp->max_burst = assoc_value; if (assoc_id == SCTP_CURRENT_ASSOC || assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &sp->ep->asocs, asocs) asoc->max_burst = assoc_value; return 0; } /* * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK) * * This set option adds a chunk type that the user is requesting to be * received only in an authenticated way. Changes to the list of chunks * will only effect future associations on the socket. */ static int sctp_setsockopt_auth_chunk(struct sock *sk, struct sctp_authchunk *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) return -EINVAL; switch (val->sauth_chunk) { case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: case SCTP_CID_SHUTDOWN_COMPLETE: case SCTP_CID_AUTH: return -EINVAL; } /* add this chunk id to the endpoint */ return sctp_auth_ep_add_chunkid(ep, val->sauth_chunk); } /* * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT) * * This option gets or sets the list of HMAC algorithms that the local * endpoint requires the peer to use. */ static int sctp_setsockopt_hmac_ident(struct sock *sk, struct sctp_hmacalgo *hmacs, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; u32 idents; if (!ep->auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) + SCTP_AUTH_NUM_HMACS * sizeof(u16)); idents = hmacs->shmac_num_idents; if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS || (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo))) return -EINVAL; return sctp_auth_ep_set_hmacs(ep, hmacs); } /* * 7.1.20. Set a shared key (SCTP_AUTH_KEY) * * This option will set a shared secret key which is used to build an * association shared key. */ static int sctp_setsockopt_auth_key(struct sock *sk, struct sctp_authkey *authkey, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; int ret = -EINVAL; if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; /* authkey->sca_keylength is u16, so optlen can't be bigger than * this. */ optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey)); if (authkey->sca_keylength > optlen - sizeof(*authkey)) goto out; asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); if (!asoc && authkey->sca_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) goto out; if (asoc) { ret = sctp_auth_set_key(ep, asoc, authkey); goto out; } if (sctp_style(sk, TCP)) authkey->sca_assoc_id = SCTP_FUTURE_ASSOC; if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || authkey->sca_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_key(ep, asoc, authkey); if (ret) goto out; } ret = 0; if (authkey->sca_assoc_id == SCTP_CURRENT_ASSOC || authkey->sca_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_set_key(ep, asoc, authkey); if (res && !ret) ret = res; } } out: memzero_explicit(authkey, optlen); return ret; } /* * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY) * * This option will get or set the active shared key to be used to build * the association shared key. */ static int sctp_setsockopt_active_key(struct sock *sk, struct sctp_authkeyid *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; int ret = 0; if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; asoc = sctp_id2assoc(sk, val->scact_assoc_id); if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) return sctp_auth_set_active_key(ep, asoc, val->scact_keynumber); if (sctp_style(sk, TCP)) val->scact_assoc_id = SCTP_FUTURE_ASSOC; if (val->scact_assoc_id == SCTP_FUTURE_ASSOC || val->scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_active_key(ep, asoc, val->scact_keynumber); if (ret) return ret; } if (val->scact_assoc_id == SCTP_CURRENT_ASSOC || val->scact_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_set_active_key(ep, asoc, val->scact_keynumber); if (res && !ret) ret = res; } } return ret; } /* * 7.1.22. Delete a shared key (SCTP_AUTH_DELETE_KEY) * * This set option will delete a shared secret key from use. */ static int sctp_setsockopt_del_key(struct sock *sk, struct sctp_authkeyid *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; int ret = 0; if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; asoc = sctp_id2assoc(sk, val->scact_assoc_id); if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) return sctp_auth_del_key_id(ep, asoc, val->scact_keynumber); if (sctp_style(sk, TCP)) val->scact_assoc_id = SCTP_FUTURE_ASSOC; if (val->scact_assoc_id == SCTP_FUTURE_ASSOC || val->scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_del_key_id(ep, asoc, val->scact_keynumber); if (ret) return ret; } if (val->scact_assoc_id == SCTP_CURRENT_ASSOC || val->scact_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_del_key_id(ep, asoc, val->scact_keynumber); if (res && !ret) ret = res; } } return ret; } /* * 8.3.4 Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY) * * This set option will deactivate a shared secret key. */ static int sctp_setsockopt_deactivate_key(struct sock *sk, struct sctp_authkeyid *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; int ret = 0; if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; asoc = sctp_id2assoc(sk, val->scact_assoc_id); if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) return sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber); if (sctp_style(sk, TCP)) val->scact_assoc_id = SCTP_FUTURE_ASSOC; if (val->scact_assoc_id == SCTP_FUTURE_ASSOC || val->scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber); if (ret) return ret; } if (val->scact_assoc_id == SCTP_CURRENT_ASSOC || val->scact_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber); if (res && !ret) ret = res; } } return ret; } /* * 8.1.23 SCTP_AUTO_ASCONF * * This option will enable or disable the use of the automatic generation of * ASCONF chunks to add and delete addresses to an existing association. Note * that this option has two caveats namely: a) it only affects sockets that * are bound to all addresses available to the SCTP stack, and b) the system * administrator may have an overriding control that turns the ASCONF feature * off no matter what setting the socket option may have. * This option expects an integer boolean flag, where a non-zero value turns on * the option, and a zero value turns off the option. * Note. In this implementation, socket operation overrides default parameter * being set by sysctl as well as FreeBSD implementation */ static int sctp_setsockopt_auto_asconf(struct sock *sk, int *val, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); if (optlen < sizeof(int)) return -EINVAL; if (!sctp_is_ep_boundall(sk) && *val) return -EINVAL; if ((*val && sp->do_auto_asconf) || (!*val && !sp->do_auto_asconf)) return 0; spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); if (*val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; } else if (*val && !sp->do_auto_asconf) { list_add_tail(&sp->auto_asconf_list, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); return 0; } /* * SCTP_PEER_ADDR_THLDS * * This option allows us to alter the partially failed threshold for one or all * transports in an association. See Section 6.1 of: * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt */ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, struct sctp_paddrthlds_v2 *val, unsigned int optlen, bool v2) { struct sctp_transport *trans; struct sctp_association *asoc; int len; len = v2 ? sizeof(*val) : sizeof(struct sctp_paddrthlds); if (optlen < len) return -EINVAL; if (v2 && val->spt_pathpfthld > val->spt_pathcpthld) return -EINVAL; if (!sctp_is_any(sk, (const union sctp_addr *)&val->spt_address)) { trans = sctp_addr_id2transport(sk, &val->spt_address, val->spt_assoc_id); if (!trans) return -ENOENT; if (val->spt_pathmaxrxt) trans->pathmaxrxt = val->spt_pathmaxrxt; if (v2) trans->ps_retrans = val->spt_pathcpthld; trans->pf_retrans = val->spt_pathpfthld; return 0; } asoc = sctp_id2assoc(sk, val->spt_assoc_id); if (!asoc && val->spt_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { if (val->spt_pathmaxrxt) trans->pathmaxrxt = val->spt_pathmaxrxt; if (v2) trans->ps_retrans = val->spt_pathcpthld; trans->pf_retrans = val->spt_pathpfthld; } if (val->spt_pathmaxrxt) asoc->pathmaxrxt = val->spt_pathmaxrxt; if (v2) asoc->ps_retrans = val->spt_pathcpthld; asoc->pf_retrans = val->spt_pathpfthld; } else { struct sctp_sock *sp = sctp_sk(sk); if (val->spt_pathmaxrxt) sp->pathmaxrxt = val->spt_pathmaxrxt; if (v2) sp->ps_retrans = val->spt_pathcpthld; sp->pf_retrans = val->spt_pathpfthld; } return 0; } static int sctp_setsockopt_recvrcvinfo(struct sock *sk, int *val, unsigned int optlen) { if (optlen < sizeof(int)) return -EINVAL; sctp_sk(sk)->recvrcvinfo = (*val == 0) ? 0 : 1; return 0; } static int sctp_setsockopt_recvnxtinfo(struct sock *sk, int *val, unsigned int optlen) { if (optlen < sizeof(int)) return -EINVAL; sctp_sk(sk)->recvnxtinfo = (*val == 0) ? 0 : 1; return 0; } static int sctp_setsockopt_pr_supported(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_association *asoc; if (optlen != sizeof(*params)) return -EINVAL; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; sctp_sk(sk)->ep->prsctp_enable = !!params->assoc_value; return 0; } static int sctp_setsockopt_default_prinfo(struct sock *sk, struct sctp_default_prinfo *info, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; int retval = -EINVAL; if (optlen != sizeof(*info)) goto out; if (info->pr_policy & ~SCTP_PR_SCTP_MASK) goto out; if (info->pr_policy == SCTP_PR_SCTP_NONE) info->pr_value = 0; asoc = sctp_id2assoc(sk, info->pr_assoc_id); if (!asoc && info->pr_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) goto out; retval = 0; if (asoc) { SCTP_PR_SET_POLICY(asoc->default_flags, info->pr_policy); asoc->default_timetolive = info->pr_value; goto out; } if (sctp_style(sk, TCP)) info->pr_assoc_id = SCTP_FUTURE_ASSOC; if (info->pr_assoc_id == SCTP_FUTURE_ASSOC || info->pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info->pr_policy); sp->default_timetolive = info->pr_value; } if (info->pr_assoc_id == SCTP_CURRENT_ASSOC || info->pr_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { SCTP_PR_SET_POLICY(asoc->default_flags, info->pr_policy); asoc->default_timetolive = info->pr_value; } } out: return retval; } static int sctp_setsockopt_reconfig_supported(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_association *asoc; int retval = -EINVAL; if (optlen != sizeof(*params)) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; sctp_sk(sk)->ep->reconf_enable = !!params->assoc_value; retval = 0; out: return retval; } static int sctp_setsockopt_enable_strreset(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; int retval = -EINVAL; if (optlen != sizeof(*params)) goto out; if (params->assoc_value & (~SCTP_ENABLE_STRRESET_MASK)) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) goto out; retval = 0; if (asoc) { asoc->strreset_enable = params->assoc_value; goto out; } if (sctp_style(sk, TCP)) params->assoc_id = SCTP_FUTURE_ASSOC; if (params->assoc_id == SCTP_FUTURE_ASSOC || params->assoc_id == SCTP_ALL_ASSOC) ep->strreset_enable = params->assoc_value; if (params->assoc_id == SCTP_CURRENT_ASSOC || params->assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &ep->asocs, asocs) asoc->strreset_enable = params->assoc_value; out: return retval; } static int sctp_setsockopt_reset_streams(struct sock *sk, struct sctp_reset_streams *params, unsigned int optlen) { struct sctp_association *asoc; if (optlen < sizeof(*params)) return -EINVAL; /* srs_number_streams is u16, so optlen can't be bigger than this. */ optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(__u16) * sizeof(*params)); if (params->srs_number_streams * sizeof(__u16) > optlen - sizeof(*params)) return -EINVAL; asoc = sctp_id2assoc(sk, params->srs_assoc_id); if (!asoc) return -EINVAL; return sctp_send_reset_streams(asoc, params); } static int sctp_setsockopt_reset_assoc(struct sock *sk, sctp_assoc_t *associd, unsigned int optlen) { struct sctp_association *asoc; if (optlen != sizeof(*associd)) return -EINVAL; asoc = sctp_id2assoc(sk, *associd); if (!asoc) return -EINVAL; return sctp_send_reset_assoc(asoc); } static int sctp_setsockopt_add_streams(struct sock *sk, struct sctp_add_streams *params, unsigned int optlen) { struct sctp_association *asoc; if (optlen != sizeof(*params)) return -EINVAL; asoc = sctp_id2assoc(sk, params->sas_assoc_id); if (!asoc) return -EINVAL; return sctp_send_add_streams(asoc, params); } static int sctp_setsockopt_scheduler(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; int retval = 0; if (optlen < sizeof(*params)) return -EINVAL; if (params->assoc_value > SCTP_SS_MAX) return -EINVAL; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) return sctp_sched_set_sched(asoc, params->assoc_value); if (sctp_style(sk, TCP)) params->assoc_id = SCTP_FUTURE_ASSOC; if (params->assoc_id == SCTP_FUTURE_ASSOC || params->assoc_id == SCTP_ALL_ASSOC) sp->default_ss = params->assoc_value; if (params->assoc_id == SCTP_CURRENT_ASSOC || params->assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { int ret = sctp_sched_set_sched(asoc, params->assoc_value); if (ret && !retval) retval = ret; } } return retval; } static int sctp_setsockopt_scheduler_value(struct sock *sk, struct sctp_stream_value *params, unsigned int optlen) { struct sctp_association *asoc; int retval = -EINVAL; if (optlen < sizeof(*params)) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_CURRENT_ASSOC && sctp_style(sk, UDP)) goto out; if (asoc) { retval = sctp_sched_set_value(asoc, params->stream_id, params->stream_value, GFP_KERNEL); goto out; } retval = 0; list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) { int ret = sctp_sched_set_value(asoc, params->stream_id, params->stream_value, GFP_KERNEL); if (ret && !retval) /* try to return the 1st error. */ retval = ret; } out: return retval; } static int sctp_setsockopt_interleaving_supported(struct sock *sk, struct sctp_assoc_value *p, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; if (optlen < sizeof(*p)) return -EINVAL; asoc = sctp_id2assoc(sk, p->assoc_id); if (!asoc && p->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) { return -EPERM; } sp->ep->intl_enable = !!p->assoc_value; return 0; } static int sctp_setsockopt_reuse_port(struct sock *sk, int *val, unsigned int optlen) { if (!sctp_style(sk, TCP)) return -EOPNOTSUPP; if (sctp_sk(sk)->ep->base.bind_addr.port) return -EFAULT; if (optlen < sizeof(int)) return -EINVAL; sctp_sk(sk)->reuse = !!*val; return 0; } static int sctp_assoc_ulpevent_type_set(struct sctp_event *param, struct sctp_association *asoc) { struct sctp_ulpevent *event; sctp_ulpevent_type_set(&asoc->subscribe, param->se_type, param->se_on); if (param->se_type == SCTP_SENDER_DRY_EVENT && param->se_on) { if (sctp_outq_is_empty(&asoc->outqueue)) { event = sctp_ulpevent_make_sender_dry_event(asoc, GFP_USER | __GFP_NOWARN); if (!event) return -ENOMEM; asoc->stream.si->enqueue_event(&asoc->ulpq, event); } } return 0; } static int sctp_setsockopt_event(struct sock *sk, struct sctp_event *param, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; int retval = 0; if (optlen < sizeof(*param)) return -EINVAL; if (param->se_type < SCTP_SN_TYPE_BASE || param->se_type > SCTP_SN_TYPE_MAX) return -EINVAL; asoc = sctp_id2assoc(sk, param->se_assoc_id); if (!asoc && param->se_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) return sctp_assoc_ulpevent_type_set(param, asoc); if (sctp_style(sk, TCP)) param->se_assoc_id = SCTP_FUTURE_ASSOC; if (param->se_assoc_id == SCTP_FUTURE_ASSOC || param->se_assoc_id == SCTP_ALL_ASSOC) sctp_ulpevent_type_set(&sp->subscribe, param->se_type, param->se_on); if (param->se_assoc_id == SCTP_CURRENT_ASSOC || param->se_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { int ret = sctp_assoc_ulpevent_type_set(param, asoc); if (ret && !retval) retval = ret; } } return retval; } static int sctp_setsockopt_asconf_supported(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_association *asoc; struct sctp_endpoint *ep; int retval = -EINVAL; if (optlen != sizeof(*params)) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; ep = sctp_sk(sk)->ep; ep->asconf_enable = !!params->assoc_value; if (ep->asconf_enable && ep->auth_enable) { sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); } retval = 0; out: return retval; } static int sctp_setsockopt_auth_supported(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_association *asoc; struct sctp_endpoint *ep; int retval = -EINVAL; if (optlen != sizeof(*params)) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; ep = sctp_sk(sk)->ep; if (params->assoc_value) { retval = sctp_auth_init(ep, GFP_KERNEL); if (retval) goto out; if (ep->asconf_enable) { sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); } } ep->auth_enable = !!params->assoc_value; retval = 0; out: return retval; } static int sctp_setsockopt_ecn_supported(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_association *asoc; int retval = -EINVAL; if (optlen != sizeof(*params)) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; sctp_sk(sk)->ep->ecn_enable = !!params->assoc_value; retval = 0; out: return retval; } static int sctp_setsockopt_pf_expose(struct sock *sk, struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_association *asoc; int retval = -EINVAL; if (optlen != sizeof(*params)) goto out; if (params->assoc_value > SCTP_PF_EXPOSE_MAX) goto out; asoc = sctp_id2assoc(sk, params->assoc_id); if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; if (asoc) asoc->pf_expose = params->assoc_value; else sctp_sk(sk)->pf_expose = params->assoc_value; retval = 0; out: return retval; } static int sctp_setsockopt_encap_port(struct sock *sk, struct sctp_udpencaps *encap, unsigned int optlen) { struct sctp_association *asoc; struct sctp_transport *t; __be16 encap_port; if (optlen != sizeof(*encap)) return -EINVAL; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ encap_port = (__force __be16)encap->sue_port; if (!sctp_is_any(sk, (union sctp_addr *)&encap->sue_address)) { t = sctp_addr_id2transport(sk, &encap->sue_address, encap->sue_assoc_id); if (!t) return -EINVAL; t->encap_port = encap_port; return 0; } /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, encap->sue_assoc_id); if (!asoc && encap->sue_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* If changes are for association, also apply encap_port to * each transport. */ if (asoc) { list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) t->encap_port = encap_port; asoc->encap_port = encap_port; return 0; } sctp_sk(sk)->encap_port = encap_port; return 0; } static int sctp_setsockopt_probe_interval(struct sock *sk, struct sctp_probeinterval *params, unsigned int optlen) { struct sctp_association *asoc; struct sctp_transport *t; __u32 probe_interval; if (optlen != sizeof(*params)) return -EINVAL; probe_interval = params->spi_interval; if (probe_interval && probe_interval < SCTP_PROBE_TIMER_MIN) return -EINVAL; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ if (!sctp_is_any(sk, (union sctp_addr *)&params->spi_address)) { t = sctp_addr_id2transport(sk, &params->spi_address, params->spi_assoc_id); if (!t) return -EINVAL; t->probe_interval = msecs_to_jiffies(probe_interval); sctp_transport_pl_reset(t); return 0; } /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params->spi_assoc_id); if (!asoc && params->spi_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* If changes are for association, also apply probe_interval to * each transport. */ if (asoc) { list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { t->probe_interval = msecs_to_jiffies(probe_interval); sctp_transport_pl_reset(t); } asoc->probe_interval = msecs_to_jiffies(probe_interval); return 0; } sctp_sk(sk)->probe_interval = probe_interval; return 0; } /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve * socket options. Socket options are used to change the default * behavior of sockets calls. They are described in Section 7. * * The syntax is: * * ret = getsockopt(int sd, int level, int optname, void __user *optval, * int __user *optlen); * ret = setsockopt(int sd, int level, int optname, const void __user *optval, * int optlen); * * sd - the socket descript. * level - set to IPPROTO_SCTP for all SCTP options. * optname - the option name. * optval - the buffer to store the value of the option. * optlen - the size of the buffer. */ static int sctp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { void *kopt = NULL; int retval = 0; pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft * REALLY is NOT helpful here... I am not convinced that the * semantics of setsockopt() with a level OTHER THAN SOL_SCTP * are at all well-founded. */ if (level != SOL_SCTP) { struct sctp_af *af = sctp_sk(sk)->pf->af; return af->setsockopt(sk, level, optname, optval, optlen); } if (optlen > 0) { /* Trim it to the biggest size sctp sockopt may need if necessary */ optlen = min_t(unsigned int, optlen, PAGE_ALIGN(USHRT_MAX + sizeof(__u16) * sizeof(struct sctp_reset_streams))); kopt = memdup_sockptr(optval, optlen); if (IS_ERR(kopt)) return PTR_ERR(kopt); } lock_sock(sk); switch (optname) { case SCTP_SOCKOPT_BINDX_ADD: /* 'optlen' is the size of the addresses buffer. */ retval = sctp_setsockopt_bindx(sk, kopt, optlen, SCTP_BINDX_ADD_ADDR); break; case SCTP_SOCKOPT_BINDX_REM: /* 'optlen' is the size of the addresses buffer. */ retval = sctp_setsockopt_bindx(sk, kopt, optlen, SCTP_BINDX_REM_ADDR); break; case SCTP_SOCKOPT_CONNECTX_OLD: /* 'optlen' is the size of the addresses buffer. */ retval = sctp_setsockopt_connectx_old(sk, kopt, optlen); break; case SCTP_SOCKOPT_CONNECTX: /* 'optlen' is the size of the addresses buffer. */ retval = sctp_setsockopt_connectx(sk, kopt, optlen); break; case SCTP_DISABLE_FRAGMENTS: retval = sctp_setsockopt_disable_fragments(sk, kopt, optlen); break; case SCTP_EVENTS: retval = sctp_setsockopt_events(sk, kopt, optlen); break; case SCTP_AUTOCLOSE: retval = sctp_setsockopt_autoclose(sk, kopt, optlen); break; case SCTP_PEER_ADDR_PARAMS: retval = sctp_setsockopt_peer_addr_params(sk, kopt, optlen); break; case SCTP_DELAYED_SACK: retval = sctp_setsockopt_delayed_ack(sk, kopt, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: retval = sctp_setsockopt_partial_delivery_point(sk, kopt, optlen); break; case SCTP_INITMSG: retval = sctp_setsockopt_initmsg(sk, kopt, optlen); break; case SCTP_DEFAULT_SEND_PARAM: retval = sctp_setsockopt_default_send_param(sk, kopt, optlen); break; case SCTP_DEFAULT_SNDINFO: retval = sctp_setsockopt_default_sndinfo(sk, kopt, optlen); break; case SCTP_PRIMARY_ADDR: retval = sctp_setsockopt_primary_addr(sk, kopt, optlen); break; case SCTP_SET_PEER_PRIMARY_ADDR: retval = sctp_setsockopt_peer_primary_addr(sk, kopt, optlen); break; case SCTP_NODELAY: retval = sctp_setsockopt_nodelay(sk, kopt, optlen); break; case SCTP_RTOINFO: retval = sctp_setsockopt_rtoinfo(sk, kopt, optlen); break; case SCTP_ASSOCINFO: retval = sctp_setsockopt_associnfo(sk, kopt, optlen); break; case SCTP_I_WANT_MAPPED_V4_ADDR: retval = sctp_setsockopt_mappedv4(sk, kopt, optlen); break; case SCTP_MAXSEG: retval = sctp_setsockopt_maxseg(sk, kopt, optlen); break; case SCTP_ADAPTATION_LAYER: retval = sctp_setsockopt_adaptation_layer(sk, kopt, optlen); break; case SCTP_CONTEXT: retval = sctp_setsockopt_context(sk, kopt, optlen); break; case SCTP_FRAGMENT_INTERLEAVE: retval = sctp_setsockopt_fragment_interleave(sk, kopt, optlen); break; case SCTP_MAX_BURST: retval = sctp_setsockopt_maxburst(sk, kopt, optlen); break; case SCTP_AUTH_CHUNK: retval = sctp_setsockopt_auth_chunk(sk, kopt, optlen); break; case SCTP_HMAC_IDENT: retval = sctp_setsockopt_hmac_ident(sk, kopt, optlen); break; case SCTP_AUTH_KEY: retval = sctp_setsockopt_auth_key(sk, kopt, optlen); break; case SCTP_AUTH_ACTIVE_KEY: retval = sctp_setsockopt_active_key(sk, kopt, optlen); break; case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, kopt, optlen); break; case SCTP_AUTH_DEACTIVATE_KEY: retval = sctp_setsockopt_deactivate_key(sk, kopt, optlen); break; case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, kopt, optlen); break; case SCTP_PEER_ADDR_THLDS: retval = sctp_setsockopt_paddr_thresholds(sk, kopt, optlen, false); break; case SCTP_PEER_ADDR_THLDS_V2: retval = sctp_setsockopt_paddr_thresholds(sk, kopt, optlen, true); break; case SCTP_RECVRCVINFO: retval = sctp_setsockopt_recvrcvinfo(sk, kopt, optlen); break; case SCTP_RECVNXTINFO: retval = sctp_setsockopt_recvnxtinfo(sk, kopt, optlen); break; case SCTP_PR_SUPPORTED: retval = sctp_setsockopt_pr_supported(sk, kopt, optlen); break; case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, kopt, optlen); break; case SCTP_RECONFIG_SUPPORTED: retval = sctp_setsockopt_reconfig_supported(sk, kopt, optlen); break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_setsockopt_enable_strreset(sk, kopt, optlen); break; case SCTP_RESET_STREAMS: retval = sctp_setsockopt_reset_streams(sk, kopt, optlen); break; case SCTP_RESET_ASSOC: retval = sctp_setsockopt_reset_assoc(sk, kopt, optlen); break; case SCTP_ADD_STREAMS: retval = sctp_setsockopt_add_streams(sk, kopt, optlen); break; case SCTP_STREAM_SCHEDULER: retval = sctp_setsockopt_scheduler(sk, kopt, optlen); break; case SCTP_STREAM_SCHEDULER_VALUE: retval = sctp_setsockopt_scheduler_value(sk, kopt, optlen); break; case SCTP_INTERLEAVING_SUPPORTED: retval = sctp_setsockopt_interleaving_supported(sk, kopt, optlen); break; case SCTP_REUSE_PORT: retval = sctp_setsockopt_reuse_port(sk, kopt, optlen); break; case SCTP_EVENT: retval = sctp_setsockopt_event(sk, kopt, optlen); break; case SCTP_ASCONF_SUPPORTED: retval = sctp_setsockopt_asconf_supported(sk, kopt, optlen); break; case SCTP_AUTH_SUPPORTED: retval = sctp_setsockopt_auth_supported(sk, kopt, optlen); break; case SCTP_ECN_SUPPORTED: retval = sctp_setsockopt_ecn_supported(sk, kopt, optlen); break; case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_setsockopt_pf_expose(sk, kopt, optlen); break; case SCTP_REMOTE_UDP_ENCAPS_PORT: retval = sctp_setsockopt_encap_port(sk, kopt, optlen); break; case SCTP_PLPMTUD_PROBE_INTERVAL: retval = sctp_setsockopt_probe_interval(sk, kopt, optlen); break; default: retval = -ENOPROTOOPT; break; } release_sock(sk); kfree(kopt); return retval; } /* API 3.1.6 connect() - UDP Style Syntax * * An application may use the connect() call in the UDP model to initiate an * association without sending data. * * The syntax is: * * ret = connect(int sd, const struct sockaddr *nam, socklen_t len); * * sd: the socket descriptor to have a new association added to. * * nam: the address structure (either struct sockaddr_in or struct * sockaddr_in6 defined in RFC2553 [7]). * * len: the size of the address. */ static int sctp_connect(struct sock *sk, struct sockaddr *addr, int addr_len, int flags) { struct sctp_af *af; int err = -EINVAL; lock_sock(sk); pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); if (af && addr_len >= af->sockaddr_len) err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); release_sock(sk); return err; } int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) return -EOPNOTSUPP; return sctp_connect(sock->sk, uaddr, addr_len, flags); } /* Only called when shutdown a listening SCTP socket. */ static int sctp_disconnect(struct sock *sk, int flags) { if (!sctp_style(sk, TCP)) return -EOPNOTSUPP; sk->sk_shutdown |= RCV_SHUTDOWN; return 0; } /* 4.1.4 accept() - TCP Style Syntax * * Applications use accept() call to remove an established SCTP * association from the accept queue of the endpoint. A new socket * descriptor will be returned from accept() to represent the newly * formed association. */ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) { struct sctp_sock *sp; struct sctp_endpoint *ep; struct sock *newsk = NULL; struct sctp_association *asoc; long timeo; int error = 0; lock_sock(sk); sp = sctp_sk(sk); ep = sp->ep; if (!sctp_style(sk, TCP)) { error = -EOPNOTSUPP; goto out; } if (!sctp_sstate(sk, LISTENING) || (sk->sk_shutdown & RCV_SHUTDOWN)) { error = -EINVAL; goto out; } timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); error = sctp_wait_for_accept(sk, timeo); if (error) goto out; /* We treat the list of associations on the endpoint as the accept * queue and pick the first association on the list. */ asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern); if (!newsk) { error = -ENOMEM; goto out; } /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ error = sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); if (error) { sk_common_release(newsk); newsk = NULL; } out: release_sock(sk); arg->err = error; return newsk; } /* The SCTP ioctl handler. */ static int sctp_ioctl(struct sock *sk, int cmd, int *karg) { int rc = -ENOTCONN; lock_sock(sk); /* * SEQPACKET-style sockets in LISTENING state are valid, for * SCTP, so only discard TCP-style sockets in LISTENING state. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) goto out; switch (cmd) { case SIOCINQ: { struct sk_buff *skb; *karg = 0; skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { /* * We will only return the amount of this packet since * that is all that will be read. */ *karg = skb->len; } rc = 0; break; } default: rc = -ENOIOCTLCMD; break; } out: release_sock(sk); return rc; } /* This is the function which gets called during socket creation to * initialized the SCTP-specific portion of the sock. * The sock structure should already be zero-filled memory. */ static int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); struct sctp_sock *sp; pr_debug("%s: sk:%p\n", __func__, sk); sp = sctp_sk(sk); /* Initialize the SCTP per socket area. */ switch (sk->sk_type) { case SOCK_SEQPACKET: sp->type = SCTP_SOCKET_UDP; break; case SOCK_STREAM: sp->type = SCTP_SOCKET_TCP; break; default: return -ESOCKTNOSUPPORT; } sk->sk_gso_type = SKB_GSO_SCTP; /* Initialize default send parameters. These parameters can be * modified with the SCTP_DEFAULT_SEND_PARAM socket option. */ sp->default_stream = 0; sp->default_ppid = 0; sp->default_flags = 0; sp->default_context = 0; sp->default_timetolive = 0; sp->default_rcv_context = 0; sp->max_burst = net->sctp.max_burst; sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg; /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or * overridden by the SCTP_INIT CMSG. */ sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init; sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ sp->rtoinfo.srto_initial = net->sctp.rto_initial; sp->rtoinfo.srto_max = net->sctp.rto_max; sp->rtoinfo.srto_min = net->sctp.rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. */ sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association; sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. */ sp->subscribe = 0; /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ sp->hbinterval = net->sctp.hb_interval; sp->udp_port = htons(net->sctp.udp_port); sp->encap_port = htons(net->sctp.encap_port); sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pf_retrans = net->sctp.pf_retrans; sp->ps_retrans = net->sctp.ps_retrans; sp->pf_expose = net->sctp.pf_expose; sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; sp->default_ss = SCTP_SS_DEFAULT; /* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. */ sp->disable_fragments = 0; /* Enable Nagle algorithm by default. */ sp->nodelay = 0; sp->recvrcvinfo = 0; sp->recvnxtinfo = 0; /* Enable by default. */ sp->v4mapped = 1; /* Auto-close idle associations after the configured * number of seconds. A value of 0 disables this * feature. Configure through the SCTP_AUTOCLOSE socket option, * for UDP-style sockets only. */ sp->autoclose = 0; /* User specified fragmentation limit. */ sp->user_frag = 0; sp->adaptation_ind = 0; sp->pf = sctp_get_pf_specific(sk->sk_family); /* Control variables for partial data delivery. */ atomic_set(&sp->pd_mode, 0); skb_queue_head_init(&sp->pd_lobby); sp->frag_interleave = 0; sp->probe_interval = net->sctp.probe_interval; /* Create a per socket endpoint structure. Even if we * change the data structure relationships, this may still * be useful for storing pre-connect address information. */ sp->ep = sctp_endpoint_new(sk, GFP_KERNEL); if (!sp->ep) return -ENOMEM; sp->hmac = NULL; sk->sk_destruct = sctp_destruct_sock; SCTP_DBG_OBJCNT_INC(sock); sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); return 0; } /* Cleanup any SCTP per socket resources. Must be called with * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true */ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; pr_debug("%s: sk:%p\n", __func__, sk); /* Release our hold on the endpoint. */ sp = sctp_sk(sk); /* This could happen during socket init, thus we bail out * early, since the rest of the below is not setup either. */ if (sp->ep == NULL) return; if (sp->do_auto_asconf) { sp->do_auto_asconf = 0; list_del(&sp->auto_asconf_list); } sctp_endpoint_free(sp->ep); sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } /* Triggered when there are no references on the socket anymore */ static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk); /* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); } static void sctp_destruct_sock(struct sock *sk) { sctp_destruct_common(sk); inet_sock_destruct(sk); } /* API 4.1.7 shutdown() - TCP Style Syntax * int shutdown(int socket, int how); * * sd - the socket descriptor of the association to be closed. * how - Specifies the type of shutdown. The values are * as follows: * SHUT_RD * Disables further receive operations. No SCTP * protocol action is taken. * SHUT_WR * Disables further send operations, and initiates * the SCTP shutdown sequence. * SHUT_RDWR * Disables further send and receive operations * and initiates the SCTP shutdown sequence. */ static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; if (!sctp_style(sk, TCP)) return; ep = sctp_sk(sk)->ep; if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { struct sctp_association *asoc; inet_sk_set_state(sk, SCTP_SS_CLOSING); asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); sctp_primitive_SHUTDOWN(net, asoc, NULL); } } int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info) { struct sctp_transport *prim; struct list_head *pos; int mask; memset(info, 0, sizeof(*info)); if (!asoc) { struct sctp_sock *sp = sctp_sk(sk); info->sctpi_s_autoclose = sp->autoclose; info->sctpi_s_adaptation_ind = sp->adaptation_ind; info->sctpi_s_pd_point = sp->pd_point; info->sctpi_s_nodelay = sp->nodelay; info->sctpi_s_disable_fragments = sp->disable_fragments; info->sctpi_s_v4mapped = sp->v4mapped; info->sctpi_s_frag_interleave = sp->frag_interleave; info->sctpi_s_type = sp->type; return 0; } info->sctpi_tag = asoc->c.my_vtag; info->sctpi_state = asoc->state; info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); info->sctpi_instrms = asoc->stream.incnt; info->sctpi_outstrms = asoc->stream.outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) info->sctpi_outqueue++; info->sctpi_overall_error = asoc->overall_error_count; info->sctpi_max_burst = asoc->max_burst; info->sctpi_maxseg = asoc->frag_point; info->sctpi_peer_rwnd = asoc->peer.rwnd; info->sctpi_peer_tag = asoc->c.peer_vtag; mask = asoc->peer.intl_capable << 1; mask = (mask | asoc->peer.ecn_capable) << 1; mask = (mask | asoc->peer.ipv4_address) << 1; mask = (mask | asoc->peer.ipv6_address) << 1; mask = (mask | asoc->peer.reconf_capable) << 1; mask = (mask | asoc->peer.asconf_capable) << 1; mask = (mask | asoc->peer.prsctp_capable) << 1; mask = (mask | asoc->peer.auth_capable); info->sctpi_peer_capable = mask; mask = asoc->peer.sack_needed << 1; mask = (mask | asoc->peer.sack_generation) << 1; mask = (mask | asoc->peer.zero_window_announced); info->sctpi_peer_sack = mask; info->sctpi_isacks = asoc->stats.isacks; info->sctpi_osacks = asoc->stats.osacks; info->sctpi_opackets = asoc->stats.opackets; info->sctpi_ipackets = asoc->stats.ipackets; info->sctpi_rtxchunks = asoc->stats.rtxchunks; info->sctpi_outofseqtsns = asoc->stats.outofseqtsns; info->sctpi_idupchunks = asoc->stats.idupchunks; info->sctpi_gapcnt = asoc->stats.gapcnt; info->sctpi_ouodchunks = asoc->stats.ouodchunks; info->sctpi_iuodchunks = asoc->stats.iuodchunks; info->sctpi_oodchunks = asoc->stats.oodchunks; info->sctpi_iodchunks = asoc->stats.iodchunks; info->sctpi_octrlchunks = asoc->stats.octrlchunks; info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; prim = asoc->peer.primary_path; memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr)); info->sctpi_p_state = prim->state; info->sctpi_p_cwnd = prim->cwnd; info->sctpi_p_srtt = prim->srtt; info->sctpi_p_rto = jiffies_to_msecs(prim->rto); info->sctpi_p_hbinterval = prim->hbinterval; info->sctpi_p_pathmaxrxt = prim->pathmaxrxt; info->sctpi_p_sackdelay = jiffies_to_msecs(prim->sackdelay); info->sctpi_p_ssthresh = prim->ssthresh; info->sctpi_p_partial_bytes_acked = prim->partial_bytes_acked; info->sctpi_p_flight_size = prim->flight_size; info->sctpi_p_error = prim->error_count; return 0; } EXPORT_SYMBOL_GPL(sctp_get_sctp_info); /* use callback to avoid exporting the core structure */ void sctp_transport_walk_start(struct rhashtable_iter *iter) __acquires(RCU) { rhltable_walk_enter(&sctp_transport_hashtable, iter); rhashtable_walk_start(iter); } void sctp_transport_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { rhashtable_walk_stop(iter); rhashtable_walk_exit(iter); } struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter) { struct sctp_transport *t; t = rhashtable_walk_next(iter); for (; t; t = rhashtable_walk_next(iter)) { if (IS_ERR(t)) { if (PTR_ERR(t) == -EAGAIN) continue; break; } if (!sctp_transport_hold(t)) continue; if (net_eq(t->asoc->base.net, net) && t->asoc->peer.primary_path == t) break; sctp_transport_put(t); } return t; } struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos) { struct sctp_transport *t; if (!pos) return SEQ_START_TOKEN; while ((t = sctp_transport_get_next(net, iter)) && !IS_ERR(t)) { if (!--pos) break; sctp_transport_put(t); } return t; } int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p) { int err = 0; int hash = 0; struct sctp_endpoint *ep; struct sctp_hashbucket *head; for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; hash++, head++) { read_lock_bh(&head->lock); sctp_for_each_hentry(ep, &head->chain) { err = cb(ep, p); if (err) break; } read_unlock_bh(&head->lock); } return err; } EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p, int dif) { struct sctp_transport *transport; struct sctp_endpoint *ep; int err = -ENOENT; rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr, dif, dif); if (!transport) { rcu_read_unlock(); return err; } ep = transport->asoc->ep; if (!sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ sctp_transport_put(transport); rcu_read_unlock(); return err; } rcu_read_unlock(); err = cb(ep, transport, p); sctp_endpoint_put(ep); sctp_transport_put(transport); return err; } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, struct net *net, int *pos, void *p) { struct rhashtable_iter hti; struct sctp_transport *tsp; struct sctp_endpoint *ep; int ret; again: ret = 0; sctp_transport_walk_start(&hti); tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { ep = tsp->asoc->ep; if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ ret = cb(ep, tsp, p); if (ret) break; sctp_endpoint_put(ep); } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) * Applications can retrieve current status information about an * association, including association state, peer receiver window size, * number of unacked data chunks, and number of data chunks pending * receipt. This information is read-only. */ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_status status; struct sctp_association *asoc = NULL; struct sctp_transport *transport; sctp_assoc_t associd; int retval = 0; if (len < sizeof(status)) { retval = -EINVAL; goto out; } len = sizeof(status); if (copy_from_user(&status, optval, len)) { retval = -EFAULT; goto out; } associd = status.sstat_assoc_id; asoc = sctp_id2assoc(sk, associd); if (!asoc) { retval = -EINVAL; goto out; } transport = asoc->peer.primary_path; status.sstat_assoc_id = sctp_assoc2id(asoc); status.sstat_state = sctp_assoc_to_state(asoc); status.sstat_rwnd = asoc->peer.rwnd; status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); status.sstat_instrms = asoc->stream.incnt; status.sstat_outstrms = asoc->stream.outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, transport->af_specific->sockaddr_len); /* Map ipv4 address into v4-mapped-on-v6 address. */ sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk), (union sctp_addr *)&status.sstat_primary.spinfo_address); status.sstat_primary.spinfo_state = transport->state; status.sstat_primary.spinfo_cwnd = transport->cwnd; status.sstat_primary.spinfo_srtt = transport->srtt; status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); status.sstat_primary.spinfo_mtu = transport->pathmtu; if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) status.sstat_primary.spinfo_state = SCTP_ACTIVE; if (put_user(len, optlen)) { retval = -EFAULT; goto out; } pr_debug("%s: len:%d, state:%d, rwnd:%d, assoc_id:%d\n", __func__, len, status.sstat_state, status.sstat_rwnd, status.sstat_assoc_id); if (copy_to_user(optval, &status, len)) { retval = -EFAULT; goto out; } out: return retval; } /* 7.2.2 Peer Address Information (SCTP_GET_PEER_ADDR_INFO) * * Applications can retrieve information about a specific peer address * of an association, including its reachability state, congestion * window, and retransmission timer values. This information is * read-only. */ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_paddrinfo pinfo; struct sctp_transport *transport; int retval = 0; if (len < sizeof(pinfo)) { retval = -EINVAL; goto out; } len = sizeof(pinfo); if (copy_from_user(&pinfo, optval, len)) { retval = -EFAULT; goto out; } transport = sctp_addr_id2transport(sk, &pinfo.spinfo_address, pinfo.spinfo_assoc_id); if (!transport) { retval = -EINVAL; goto out; } if (transport->state == SCTP_PF && transport->asoc->pf_expose == SCTP_PF_EXPOSE_DISABLE) { retval = -EACCES; goto out; } pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc); pinfo.spinfo_state = transport->state; pinfo.spinfo_cwnd = transport->cwnd; pinfo.spinfo_srtt = transport->srtt; pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); pinfo.spinfo_mtu = transport->pathmtu; if (pinfo.spinfo_state == SCTP_UNKNOWN) pinfo.spinfo_state = SCTP_ACTIVE; if (put_user(len, optlen)) { retval = -EFAULT; goto out; } if (copy_to_user(optval, &pinfo, len)) { retval = -EFAULT; goto out; } out: return retval; } /* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) * * This option is a on/off flag. If enabled no SCTP message * fragmentation will be performed. Instead if a message being sent * exceeds the current PMTU size, the message will NOT be sent and * instead a error will be indicated to the user. */ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); val = (sctp_sk(sk)->disable_fragments == 1); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* 7.1.15 Set notification and ancillary events (SCTP_EVENTS) * * This socket option is used to specify various notifications and * ancillary data the user wishes to receive. */ static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_event_subscribe subscribe; __u8 *sn_type = (__u8 *)&subscribe; int i; if (len == 0) return -EINVAL; if (len > sizeof(struct sctp_event_subscribe)) len = sizeof(struct sctp_event_subscribe); if (put_user(len, optlen)) return -EFAULT; for (i = 0; i < len; i++) sn_type[i] = sctp_ulpevent_type_enabled(sctp_sk(sk)->subscribe, SCTP_SN_TYPE_BASE + i); if (copy_to_user(optval, &subscribe, len)) return -EFAULT; return 0; } /* 7.1.8 Automatic Close of associations (SCTP_AUTOCLOSE) * * This socket option is applicable to the UDP-style socket only. When * set it will cause associations that are idle for more than the * specified number of seconds to automatically close. An association * being idle is defined an association that has NOT sent or received * user data. The special value of '0' indicates that no automatic * close of any associations should be performed. The option expects an * integer defining the number of seconds of idle time before an * association is closed. */ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optval, int __user *optlen) { /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) return -EOPNOTSUPP; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); if (put_user(len, optlen)) return -EFAULT; if (put_user(sctp_sk(sk)->autoclose, (int __user *)optval)) return -EFAULT; return 0; } /* Helper routine to branch off an association to a new socket. */ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) { struct sctp_association *asoc = sctp_id2assoc(sk, id); struct sctp_sock *sp = sctp_sk(sk); struct socket *sock; int err = 0; /* Do not peel off from one netns to another one. */ if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) return -EINVAL; if (!asoc) return -EINVAL; /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ if (!sctp_style(sk, UDP)) return -EINVAL; /* Create a new socket. */ err = sock_create(sk->sk_family, SOCK_SEQPACKET, IPPROTO_SCTP, &sock); if (err < 0) return err; sctp_copy_sock(sock->sk, sk, asoc); /* Make peeled-off sockets more like 1-1 accepted sockets. * Set the daddr and initialize id to something more random and also * copy over any ip options. */ sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the * asoc to the newsk. */ err = sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); if (err) { sock_release(sock); sock = NULL; } *sockp = sock; return err; } EXPORT_SYMBOL(sctp_do_peeloff); static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *peeloff, struct file **newfile, unsigned flags) { struct socket *newsock; int retval; retval = sctp_do_peeloff(sk, peeloff->associd, &newsock); if (retval < 0) goto out; /* Map the socket to an unused fd that can be returned to the user. */ retval = get_unused_fd_flags(flags & SOCK_CLOEXEC); if (retval < 0) { sock_release(newsock); goto out; } *newfile = sock_alloc_file(newsock, 0, NULL); if (IS_ERR(*newfile)) { put_unused_fd(retval); retval = PTR_ERR(*newfile); *newfile = NULL; return retval; } pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, retval); peeloff->sd = retval; if (flags & SOCK_NONBLOCK) (*newfile)->f_flags |= O_NONBLOCK; out: return retval; } static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen) { sctp_peeloff_arg_t peeloff; struct file *newfile = NULL; int retval = 0; if (len < sizeof(sctp_peeloff_arg_t)) return -EINVAL; len = sizeof(sctp_peeloff_arg_t); if (copy_from_user(&peeloff, optval, len)) return -EFAULT; retval = sctp_getsockopt_peeloff_common(sk, &peeloff, &newfile, 0); if (retval < 0) goto out; /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { fput(newfile); put_unused_fd(retval); return -EFAULT; } if (copy_to_user(optval, &peeloff, len)) { fput(newfile); put_unused_fd(retval); return -EFAULT; } fd_install(retval, newfile); out: return retval; } static int sctp_getsockopt_peeloff_flags(struct sock *sk, int len, char __user *optval, int __user *optlen) { sctp_peeloff_flags_arg_t peeloff; struct file *newfile = NULL; int retval = 0; if (len < sizeof(sctp_peeloff_flags_arg_t)) return -EINVAL; len = sizeof(sctp_peeloff_flags_arg_t); if (copy_from_user(&peeloff, optval, len)) return -EFAULT; retval = sctp_getsockopt_peeloff_common(sk, &peeloff.p_arg, &newfile, peeloff.flags); if (retval < 0) goto out; /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { fput(newfile); put_unused_fd(retval); return -EFAULT; } if (copy_to_user(optval, &peeloff, len)) { fput(newfile); put_unused_fd(retval); return -EFAULT; } fd_install(retval, newfile); out: return retval; } /* 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS) * * Applications can enable or disable heartbeats for any peer address of * an association, modify an address's heartbeat interval, force a * heartbeat to be sent immediately, and adjust the address's maximum * number of retransmissions sent before an address is considered * unreachable. The following structure is used to access and modify an * address's parameters: * * struct sctp_paddrparams { * sctp_assoc_t spp_assoc_id; * struct sockaddr_storage spp_address; * uint32_t spp_hbinterval; * uint16_t spp_pathmaxrxt; * uint32_t spp_pathmtu; * uint32_t spp_sackdelay; * uint32_t spp_flags; * }; * * spp_assoc_id - (one-to-many style socket) This is filled in the * application, and identifies the association for * this query. * spp_address - This specifies which address is of interest. * spp_hbinterval - This contains the value of the heartbeat interval, * in milliseconds. If a value of zero * is present in this field then no changes are to * be made to this parameter. * spp_pathmaxrxt - This contains the maximum number of * retransmissions before this address shall be * considered unreachable. If a value of zero * is present in this field then no changes are to * be made to this parameter. * spp_pathmtu - When Path MTU discovery is disabled the value * specified here will be the "fixed" path mtu. * Note that if the spp_address field is empty * then all associations on this address will * have this fixed path mtu set upon them. * * spp_sackdelay - When delayed sack is enabled, this value specifies * the number of milliseconds that sacks will be delayed * for. This value will apply to all addresses of an * association if the spp_address field is empty. Note * also, that if delayed sack is enabled and this * value is set to 0, no change is made to the last * recorded delayed sack timer value. * * spp_flags - These flags are used to control various features * on an association. The flag field may contain * zero or more of the following options. * * SPP_HB_ENABLE - Enable heartbeats on the * specified address. Note that if the address * field is empty all addresses for the association * have heartbeats enabled upon them. * * SPP_HB_DISABLE - Disable heartbeats on the * speicifed address. Note that if the address * field is empty all addresses for the association * will have their heartbeats disabled. Note also * that SPP_HB_ENABLE and SPP_HB_DISABLE are * mutually exclusive, only one of these two should * be specified. Enabling both fields will have * undetermined results. * * SPP_HB_DEMAND - Request a user initiated heartbeat * to be made immediately. * * SPP_PMTUD_ENABLE - This field will enable PMTU * discovery upon the specified address. Note that * if the address feild is empty then all addresses * on the association are effected. * * SPP_PMTUD_DISABLE - This field will disable PMTU * discovery upon the specified address. Note that * if the address feild is empty then all addresses * on the association are effected. Not also that * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually * exclusive. Enabling both will have undetermined * results. * * SPP_SACKDELAY_ENABLE - Setting this flag turns * on delayed sack. The time specified in spp_sackdelay * is used to specify the sack delay for this address. Note * that if spp_address is empty then all addresses will * enable delayed sack and take on the sack delay * value specified in spp_sackdelay. * SPP_SACKDELAY_DISABLE - Setting this flag turns * off delayed sack. If the spp_address field is blank then * delayed sack is disabled for the entire association. Note * also that this field is mutually exclusive to * SPP_SACKDELAY_ENABLE, setting both will have undefined * results. * * SPP_IPV6_FLOWLABEL: Setting this flag enables the * setting of the IPV6 flow label value. The value is * contained in the spp_ipv6_flowlabel field. * Upon retrieval, this flag will be set to indicate that * the spp_ipv6_flowlabel field has a valid value returned. * If a specific destination address is set (in the * spp_address field), then the value returned is that of * the address. If just an association is specified (and * no address), then the association's default flow label * is returned. If neither an association nor a destination * is specified, then the socket's default flow label is * returned. For non-IPv6 sockets, this flag will be left * cleared. * * SPP_DSCP: Setting this flag enables the setting of the * Differentiated Services Code Point (DSCP) value * associated with either the association or a specific * address. The value is obtained in the spp_dscp field. * Upon retrieval, this flag will be set to indicate that * the spp_dscp field has a valid value returned. If a * specific destination address is set when called (in the * spp_address field), then that specific destination * address's DSCP value is returned. If just an association * is specified, then the association's default DSCP is * returned. If neither an association nor a destination is * specified, then the socket's default DSCP is returned. * * spp_ipv6_flowlabel * - This field is used in conjunction with the * SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label. * The 20 least significant bits are used for the flow * label. This setting has precedence over any IPv6-layer * setting. * * spp_dscp - This field is used in conjunction with the SPP_DSCP flag * and contains the DSCP. The 6 most significant bits are * used for the DSCP. This setting has precedence over any * IPv4- or IPv6- layer setting. */ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_paddrparams params; struct sctp_transport *trans = NULL; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); if (len >= sizeof(params)) len = sizeof(params); else if (len >= ALIGN(offsetof(struct sctp_paddrparams, spp_ipv6_flowlabel), 4)) len = ALIGN(offsetof(struct sctp_paddrparams, spp_ipv6_flowlabel), 4); else return -EINVAL; if (copy_from_user(&params, optval, len)) return -EFAULT; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ if (!sctp_is_any(sk, (union sctp_addr *)&params.spp_address)) { trans = sctp_addr_id2transport(sk, &params.spp_address, params.spp_assoc_id); if (!trans) { pr_debug("%s: failed no transport\n", __func__); return -EINVAL; } } /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } if (trans) { /* Fetch transport values. */ params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval); params.spp_pathmtu = trans->pathmtu; params.spp_pathmaxrxt = trans->pathmaxrxt; params.spp_sackdelay = jiffies_to_msecs(trans->sackdelay); /*draft-11 doesn't say what to return in spp_flags*/ params.spp_flags = trans->param_flags; if (trans->flowlabel & SCTP_FLOWLABEL_SET_MASK) { params.spp_ipv6_flowlabel = trans->flowlabel & SCTP_FLOWLABEL_VAL_MASK; params.spp_flags |= SPP_IPV6_FLOWLABEL; } if (trans->dscp & SCTP_DSCP_SET_MASK) { params.spp_dscp = trans->dscp & SCTP_DSCP_VAL_MASK; params.spp_flags |= SPP_DSCP; } } else if (asoc) { /* Fetch association values. */ params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval); params.spp_pathmtu = asoc->pathmtu; params.spp_pathmaxrxt = asoc->pathmaxrxt; params.spp_sackdelay = jiffies_to_msecs(asoc->sackdelay); /*draft-11 doesn't say what to return in spp_flags*/ params.spp_flags = asoc->param_flags; if (asoc->flowlabel & SCTP_FLOWLABEL_SET_MASK) { params.spp_ipv6_flowlabel = asoc->flowlabel & SCTP_FLOWLABEL_VAL_MASK; params.spp_flags |= SPP_IPV6_FLOWLABEL; } if (asoc->dscp & SCTP_DSCP_SET_MASK) { params.spp_dscp = asoc->dscp & SCTP_DSCP_VAL_MASK; params.spp_flags |= SPP_DSCP; } } else { /* Fetch socket values. */ params.spp_hbinterval = sp->hbinterval; params.spp_pathmtu = sp->pathmtu; params.spp_sackdelay = sp->sackdelay; params.spp_pathmaxrxt = sp->pathmaxrxt; /*draft-11 doesn't say what to return in spp_flags*/ params.spp_flags = sp->param_flags; if (sp->flowlabel & SCTP_FLOWLABEL_SET_MASK) { params.spp_ipv6_flowlabel = sp->flowlabel & SCTP_FLOWLABEL_VAL_MASK; params.spp_flags |= SPP_IPV6_FLOWLABEL; } if (sp->dscp & SCTP_DSCP_SET_MASK) { params.spp_dscp = sp->dscp & SCTP_DSCP_VAL_MASK; params.spp_flags |= SPP_DSCP; } } if (copy_to_user(optval, &params, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; return 0; } /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * * This option will effect the way delayed acks are performed. This * option allows you to get or set the delayed ack time, in * milliseconds. It also allows changing the delayed ack frequency. * Changing the frequency to 1 disables the delayed sack algorithm. If * the assoc_id is 0, then this sets or gets the endpoints default * values. If the assoc_id field is non-zero, then the set or get * effects the specified association for the one to many model (the * assoc_id field is ignored by the one to one model). Note that if * sack_delay or sack_freq are 0 when setting this option, then the * current values will remain unchanged. * * struct sctp_sack_info { * sctp_assoc_t sack_assoc_id; * uint32_t sack_delay; * uint32_t sack_freq; * }; * * sack_assoc_id - This parameter, indicates which association the user * is performing an action upon. Note that if this field's value is * zero then the endpoints default value is changed (effecting future * associations only). * * sack_delay - This parameter contains the number of milliseconds that * the user is requesting the delayed ACK timer be set to. Note that * this value is defined in the standard to be between 200 and 500 * milliseconds. * * sack_freq - This parameter contains the number of packets that must * be received before a sack is sent without waiting for the delay * timer to expire. The default value for this is 2, setting this * value to 1 will disable the delayed sack algorithm. */ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_sack_info params; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); if (len >= sizeof(struct sctp_sack_info)) { len = sizeof(struct sctp_sack_info); if (copy_from_user(&params, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { pr_warn_ratelimited(DEPRECATED "%s (pid %d) " "Use of struct sctp_assoc_value in delayed_ack socket option.\n" "Use struct sctp_sack_info instead\n", current->comm, task_pid_nr(current)); if (copy_from_user(&params, optval, len)) return -EFAULT; } else return -EINVAL; /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); if (!asoc && params.sack_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { /* Fetch association values. */ if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { params.sack_delay = jiffies_to_msecs(asoc->sackdelay); params.sack_freq = asoc->sackfreq; } else { params.sack_delay = 0; params.sack_freq = 1; } } else { /* Fetch socket values. */ if (sp->param_flags & SPP_SACKDELAY_ENABLE) { params.sack_delay = sp->sackdelay; params.sack_freq = sp->sackfreq; } else { params.sack_delay = 0; params.sack_freq = 1; } } if (copy_to_user(optval, &params, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; return 0; } /* 7.1.3 Initialization Parameters (SCTP_INITMSG) * * Applications can specify protocol parameters for the default association * initialization. The option name argument to setsockopt() and getsockopt() * is SCTP_INITMSG. * * Setting initialization parameters is effective only on an unconnected * socket (for UDP-style sockets only future associations are effected * by the change). With TCP-style sockets, this option is inherited by * sockets derived from a listener socket. */ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval, int __user *optlen) { if (len < sizeof(struct sctp_initmsg)) return -EINVAL; len = sizeof(struct sctp_initmsg); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &sctp_sk(sk)->initmsg, len)) return -EFAULT; return 0; } static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_association *asoc; int cnt = 0; struct sctp_getaddrs getaddrs; struct sctp_transport *from; void __user *to; union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; size_t space_left; int bytes_copied; if (len < sizeof(struct sctp_getaddrs)) return -EINVAL; if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) return -EFAULT; /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; to = optval + offsetof(struct sctp_getaddrs, addrs); space_left = len - offsetof(struct sctp_getaddrs, addrs); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { memcpy(&temp, &from->ipaddr, sizeof(temp)); addrlen = sctp_get_pf_specific(sk->sk_family) ->addr_to_user(sp, &temp); if (space_left < addrlen) return -ENOMEM; if (copy_to_user(to, &temp, addrlen)) return -EFAULT; to += addrlen; cnt++; space_left -= addrlen; } if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) return -EFAULT; bytes_copied = ((char __user *)to) - optval; if (put_user(bytes_copied, optlen)) return -EFAULT; return 0; } static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; struct net *net = sock_net(sk); rcu_read_lock(); list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; if ((PF_INET6 == sk->sk_family) && inet_v6_ipv6only(sk) && (AF_INET == addr->a.sa.sa_family)) continue; memcpy(&temp, &addr->a, sizeof(temp)); if (!temp.v4.sin_port) temp.v4.sin_port = htons(port); addrlen = sctp_get_pf_specific(sk->sk_family) ->addr_to_user(sctp_sk(sk), &temp); if (space_left < addrlen) { cnt = -ENOMEM; break; } memcpy(to, &temp, addrlen); to += addrlen; cnt++; space_left -= addrlen; *bytes_copied += addrlen; } rcu_read_unlock(); return cnt; } static int sctp_getsockopt_local_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_bind_addr *bp; struct sctp_association *asoc; int cnt = 0; struct sctp_getaddrs getaddrs; struct sctp_sockaddr_entry *addr; void __user *to; union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; int err = 0; size_t space_left; int bytes_copied = 0; void *addrs; void *buf; if (len < sizeof(struct sctp_getaddrs)) return -EINVAL; if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) return -EFAULT; /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound * addresses are returned without regard to any particular * association. */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; } to = optval + offsetof(struct sctp_getaddrs, addrs); space_left = len - offsetof(struct sctp_getaddrs, addrs); addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN); if (!addrs) return -ENOMEM; /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs(sk, bp->port, addrs, space_left, &bytes_copied); if (cnt < 0) { err = cnt; goto out; } goto copy_getaddrs; } } buf = addrs; /* Protection on the bound address list is not needed since * in the socket option context we hold a socket lock and * thus the bound address list can't change. */ list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); addrlen = sctp_get_pf_specific(sk->sk_family) ->addr_to_user(sp, &temp); if (space_left < addrlen) { err = -ENOMEM; /*fixme: right error?*/ goto out; } memcpy(buf, &temp, addrlen); buf += addrlen; bytes_copied += addrlen; cnt++; space_left -= addrlen; } copy_getaddrs: if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; goto out; } if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) { err = -EFAULT; goto out; } /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too, * but we can't change it anymore. */ if (put_user(bytes_copied, optlen)) err = -EFAULT; out: kfree(addrs); return err; } /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) * * Requests that the local SCTP stack use the enclosed peer address as * the association primary. The enclosed address must be one of the * association peer's addresses. */ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_prim prim; struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); if (len < sizeof(struct sctp_prim)) return -EINVAL; len = sizeof(struct sctp_prim); if (copy_from_user(&prim, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, prim.ssp_assoc_id); if (!asoc) return -EINVAL; if (!asoc->peer.primary_path) return -ENOTCONN; memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr, asoc->peer.primary_path->af_specific->sockaddr_len); sctp_get_pf_specific(sk->sk_family)->addr_to_user(sp, (union sctp_addr *)&prim.ssp_addr); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &prim, len)) return -EFAULT; return 0; } /* * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) * * Requests that the local endpoint set the specified Adaptation Layer * Indication parameter for all future INIT and INIT-ACK exchanges. */ static int sctp_getsockopt_adaptation_layer(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_setadaptation adaptation; if (len < sizeof(struct sctp_setadaptation)) return -EINVAL; len = sizeof(struct sctp_setadaptation); adaptation.ssb_adaptation_ind = sctp_sk(sk)->adaptation_ind; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &adaptation, len)) return -EFAULT; return 0; } /* * * 7.1.14 Set default send parameters (SCTP_DEFAULT_SEND_PARAM) * * Applications that wish to use the sendto() system call may wish to * specify a default set of parameters that would normally be supplied * through the inclusion of ancillary data. This socket option allows * such an application to set the default sctp_sndrcvinfo structure. * The application that wishes to use this socket option simply passes * in to this call the sctp_sndrcvinfo structure defined in Section * 5.2.2) The input parameters accepted by this call include * sinfo_stream, sinfo_flags, sinfo_ppid, sinfo_context, * sinfo_timetolive. The user must provide the sinfo_assoc_id field in * to this call if the caller is using the UDP model. * * For getsockopt, it get the default sctp_sndrcvinfo structure. */ static int sctp_getsockopt_default_send_param(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_sndrcvinfo info; if (len < sizeof(info)) return -EINVAL; len = sizeof(info); if (copy_from_user(&info, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); if (!asoc && info.sinfo_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; info.sinfo_ppid = asoc->default_ppid; info.sinfo_context = asoc->default_context; info.sinfo_timetolive = asoc->default_timetolive; } else { info.sinfo_stream = sp->default_stream; info.sinfo_flags = sp->default_flags; info.sinfo_ppid = sp->default_ppid; info.sinfo_context = sp->default_context; info.sinfo_timetolive = sp->default_timetolive; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &info, len)) return -EFAULT; return 0; } /* RFC6458, Section 8.1.31. Set/get Default Send Parameters * (SCTP_DEFAULT_SNDINFO) */ static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_sndinfo info; if (len < sizeof(info)) return -EINVAL; len = sizeof(info); if (copy_from_user(&info, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, info.snd_assoc_id); if (!asoc && info.snd_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { info.snd_sid = asoc->default_stream; info.snd_flags = asoc->default_flags; info.snd_ppid = asoc->default_ppid; info.snd_context = asoc->default_context; } else { info.snd_sid = sp->default_stream; info.snd_flags = sp->default_flags; info.snd_ppid = sp->default_ppid; info.snd_context = sp->default_context; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &info, len)) return -EFAULT; return 0; } /* * * 7.1.5 SCTP_NODELAY * * Turn on/off any Nagle-like algorithm. This means that packets are * generally sent as soon as possible and no unnecessary delays are * introduced, at the cost of more packets in the network. Expects an * integer boolean flag. */ static int sctp_getsockopt_nodelay(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); val = (sctp_sk(sk)->nodelay == 1); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * * 7.1.1 SCTP_RTOINFO * * The protocol parameters used to initialize and bound retransmission * timeout (RTO) are tunable. sctp_rtoinfo structure is used to access * and modify these parameters. * All parameters are time values, in milliseconds. A value of 0, when * modifying the parameters, indicates that the current value should not * be changed. * */ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; if (len < sizeof (struct sctp_rtoinfo)) return -EINVAL; len = sizeof(struct sctp_rtoinfo); if (copy_from_user(&rtoinfo, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* Values corresponding to the specific association. */ if (asoc) { rtoinfo.srto_initial = jiffies_to_msecs(asoc->rto_initial); rtoinfo.srto_max = jiffies_to_msecs(asoc->rto_max); rtoinfo.srto_min = jiffies_to_msecs(asoc->rto_min); } else { /* Values corresponding to the endpoint. */ struct sctp_sock *sp = sctp_sk(sk); rtoinfo.srto_initial = sp->rtoinfo.srto_initial; rtoinfo.srto_max = sp->rtoinfo.srto_max; rtoinfo.srto_min = sp->rtoinfo.srto_min; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &rtoinfo, len)) return -EFAULT; return 0; } /* * * 7.1.2 SCTP_ASSOCINFO * * This option is used to tune the maximum retransmission attempts * of the association. * Returns an error if the new association retransmission value is * greater than the sum of the retransmission value of the peer. * See [SCTP] for more information. * */ static int sctp_getsockopt_associnfo(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assocparams assocparams; struct sctp_association *asoc; struct list_head *pos; int cnt = 0; if (len < sizeof (struct sctp_assocparams)) return -EINVAL; len = sizeof(struct sctp_assocparams); if (copy_from_user(&assocparams, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* Values correspoinding to the specific association */ if (asoc) { assocparams.sasoc_asocmaxrxt = asoc->max_retrans; assocparams.sasoc_peer_rwnd = asoc->peer.rwnd; assocparams.sasoc_local_rwnd = asoc->a_rwnd; assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { cnt++; } assocparams.sasoc_number_peer_destinations = cnt; } else { /* Values corresponding to the endpoint */ struct sctp_sock *sp = sctp_sk(sk); assocparams.sasoc_asocmaxrxt = sp->assocparams.sasoc_asocmaxrxt; assocparams.sasoc_peer_rwnd = sp->assocparams.sasoc_peer_rwnd; assocparams.sasoc_local_rwnd = sp->assocparams.sasoc_local_rwnd; assocparams.sasoc_cookie_life = sp->assocparams.sasoc_cookie_life; assocparams.sasoc_number_peer_destinations = sp->assocparams. sasoc_number_peer_destinations; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &assocparams, len)) return -EFAULT; return 0; } /* * 7.1.16 Set/clear IPv4 mapped addresses (SCTP_I_WANT_MAPPED_V4_ADDR) * * This socket option is a boolean flag which turns on or off mapped V4 * addresses. If this option is turned on and the socket is type * PF_INET6, then IPv4 addresses will be mapped to V6 representation. * If this option is turned off, then no mapping will be done of V4 * addresses and a user will receive both PF_INET6 and PF_INET type * addresses on the socket. */ static int sctp_getsockopt_mappedv4(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val; struct sctp_sock *sp = sctp_sk(sk); if (len < sizeof(int)) return -EINVAL; len = sizeof(int); val = sp->v4mapped; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * 7.1.29. Set or Get the default context (SCTP_CONTEXT) * (chapter and verse is quoted at sctp_setsockopt_context()) */ static int sctp_getsockopt_context(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; if (len < sizeof(struct sctp_assoc_value)) return -EINVAL; len = sizeof(struct sctp_assoc_value); if (copy_from_user(&params, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; params.assoc_value = asoc ? asoc->default_rcv_context : sctp_sk(sk)->default_rcv_context; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &params, len)) return -EFAULT; return 0; } /* * 8.1.16. Get or Set the Maximum Fragmentation Size (SCTP_MAXSEG) * This option will get or set the maximum size to put in any outgoing * SCTP DATA chunk. If a message is larger than this size it will be * fragmented by SCTP into the specified size. Note that the underlying * SCTP implementation may fragment into smaller sized chunks when the * PMTU of the underlying association is smaller than the value set by * the user. The default value for this option is '0' which indicates * the user is NOT limiting fragmentation and only the PMTU will effect * SCTP's choice of DATA chunk size. Note also that values set larger * than the maximum size of an IP datagram will effectively let SCTP * control fragmentation (i.e. the same as setting this option to 0). * * The following structure is used to access and modify this parameter: * * struct sctp_assoc_value { * sctp_assoc_t assoc_id; * uint32_t assoc_value; * }; * * assoc_id: This parameter is ignored for one-to-one style sockets. * For one-to-many style sockets this parameter indicates which * association the user is performing an action upon. Note that if * this field's value is zero then the endpoints default value is * changed (effecting future associations only). * assoc_value: This parameter specifies the maximum size in bytes. */ static int sctp_getsockopt_maxseg(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; if (len == sizeof(int)) { pr_warn_ratelimited(DEPRECATED "%s (pid %d) " "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(&params, optval, len)) return -EFAULT; } else return -EINVAL; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) params.assoc_value = asoc->frag_point; else params.assoc_value = sctp_sk(sk)->user_frag; if (put_user(len, optlen)) return -EFAULT; if (len == sizeof(int)) { if (copy_to_user(optval, &params.assoc_value, len)) return -EFAULT; } else { if (copy_to_user(optval, &params, len)) return -EFAULT; } return 0; } /* * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE) * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave()) */ static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); val = sctp_sk(sk)->frag_interleave; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * 7.1.25. Set or Get the sctp partial delivery point * (chapter and verse is quoted at sctp_setsockopt_partial_delivery_point()) */ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len, char __user *optval, int __user *optlen) { u32 val; if (len < sizeof(u32)) return -EINVAL; len = sizeof(u32); val = sctp_sk(sk)->pd_point; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST) * (chapter and verse is quoted at sctp_setsockopt_maxburst()) */ static int sctp_getsockopt_maxburst(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; if (len == sizeof(int)) { pr_warn_ratelimited(DEPRECATED "%s (pid %d) " "Use of int in max_burst socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(&params, optval, len)) return -EFAULT; } else return -EINVAL; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; params.assoc_value = asoc ? asoc->max_burst : sctp_sk(sk)->max_burst; if (len == sizeof(int)) { if (copy_to_user(optval, &params.assoc_value, len)) return -EFAULT; } else { if (copy_to_user(optval, &params, len)) return -EFAULT; } return 0; } static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; int i; if (!ep->auth_enable) return -EACCES; hmacs = ep->auth_hmacs_list; data_len = ntohs(hmacs->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < sizeof(struct sctp_hmacalgo) + data_len) return -EINVAL; len = sizeof(struct sctp_hmacalgo) + data_len; num_idents = data_len / sizeof(u16); if (put_user(len, optlen)) return -EFAULT; if (put_user(num_idents, &p->shmac_num_idents)) return -EFAULT; for (i = 0; i < num_idents; i++) { __u16 hmacid = ntohs(hmacs->hmac_ids[i]); if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16))) return -EFAULT; } return 0; } static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; if (len < sizeof(struct sctp_authkeyid)) return -EINVAL; len = sizeof(struct sctp_authkeyid); if (copy_from_user(&val, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { if (!asoc->peer.auth_capable) return -EACCES; val.scact_keynumber = asoc->active_key_id; } else { if (!ep->auth_enable) return -EACCES; val.scact_keynumber = ep->active_key_id; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; struct sctp_chunks_param *ch; u32 num_chunks = 0; char __user *to; if (len < sizeof(struct sctp_authchunks)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; to = p->gauth_chunks; asoc = sctp_id2assoc(sk, val.gauth_assoc_id); if (!asoc) return -EINVAL; if (!asoc->peer.auth_capable) return -EACCES; ch = asoc->peer.peer_chunks; if (!ch) goto num; /* See if the user provided enough room for all the data */ num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < num_chunks) return -EINVAL; if (copy_to_user(to, ch->chunks, num_chunks)) return -EFAULT; num: len = sizeof(struct sctp_authchunks) + num_chunks; if (put_user(len, optlen)) return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; return 0; } static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; struct sctp_chunks_param *ch; u32 num_chunks = 0; char __user *to; if (len < sizeof(struct sctp_authchunks)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; to = p->gauth_chunks; asoc = sctp_id2assoc(sk, val.gauth_assoc_id); if (!asoc && val.gauth_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { if (!asoc->peer.auth_capable) return -EACCES; ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; } else { if (!ep->auth_enable) return -EACCES; ch = ep->auth_chunk_list; } if (!ch) goto num; num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < sizeof(struct sctp_authchunks) + num_chunks) return -EINVAL; if (copy_to_user(to, ch->chunks, num_chunks)) return -EFAULT; num: len = sizeof(struct sctp_authchunks) + num_chunks; if (put_user(len, optlen)) return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; return 0; } /* * 8.2.5. Get the Current Number of Associations (SCTP_GET_ASSOC_NUMBER) * This option gets the current number of associations that are attached * to a one-to-many style socket. The option value is an uint32_t. */ static int sctp_getsockopt_assoc_number(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; u32 val = 0; if (sctp_style(sk, TCP)) return -EOPNOTSUPP; if (len < sizeof(u32)) return -EINVAL; len = sizeof(u32); list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { val++; } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * 8.1.23 SCTP_AUTO_ASCONF * See the corresponding setsockopt entry as description */ static int sctp_getsockopt_auto_asconf(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val = 0; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); if (sctp_sk(sk)->do_auto_asconf && sctp_is_ep_boundall(sk)) val = 1; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * 8.2.6. Get the Current Identifiers of Associations * (SCTP_GET_ASSOC_ID_LIST) * * This option gets the current list of SCTP association identifiers of * the SCTP associations handled by a one-to-many style socket. */ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_assoc_ids *ids; size_t ids_size; u32 num = 0; if (sctp_style(sk, TCP)) return -EOPNOTSUPP; if (len < sizeof(struct sctp_assoc_ids)) return -EINVAL; list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { num++; } ids_size = struct_size(ids, gaids_assoc_id, num); if (len < ids_size) return -EINVAL; len = ids_size; ids = kmalloc(len, GFP_USER | __GFP_NOWARN); if (unlikely(!ids)) return -ENOMEM; ids->gaids_number_of_ids = num; num = 0; list_for_each_entry(asoc, &(sp->ep->asocs), asocs) { ids->gaids_assoc_id[num++] = asoc->assoc_id; } if (put_user(len, optlen) || copy_to_user(optval, ids, len)) { kfree(ids); return -EFAULT; } kfree(ids); return 0; } /* * SCTP_PEER_ADDR_THLDS * * This option allows us to fetch the partially failed threshold for one or all * transports in an association. See Section 6.1 of: * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt */ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, char __user *optval, int len, int __user *optlen, bool v2) { struct sctp_paddrthlds_v2 val; struct sctp_transport *trans; struct sctp_association *asoc; int min; min = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds); if (len < min) return -EINVAL; len = min; if (copy_from_user(&val, optval, len)) return -EFAULT; if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { trans = sctp_addr_id2transport(sk, &val.spt_address, val.spt_assoc_id); if (!trans) return -ENOENT; val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; val.spt_pathcpthld = trans->ps_retrans; goto out; } asoc = sctp_id2assoc(sk, val.spt_assoc_id); if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { val.spt_pathpfthld = asoc->pf_retrans; val.spt_pathmaxrxt = asoc->pathmaxrxt; val.spt_pathcpthld = asoc->ps_retrans; } else { struct sctp_sock *sp = sctp_sk(sk); val.spt_pathpfthld = sp->pf_retrans; val.spt_pathmaxrxt = sp->pathmaxrxt; val.spt_pathcpthld = sp->ps_retrans; } out: if (put_user(len, optlen) || copy_to_user(optval, &val, len)) return -EFAULT; return 0; } /* * SCTP_GET_ASSOC_STATS * * This option retrieves local per endpoint statistics. It is modeled * after OpenSolaris' implementation */ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_stats sas; struct sctp_association *asoc = NULL; /* User must provide at least the assoc id */ if (len < sizeof(sctp_assoc_t)) return -EINVAL; /* Allow the struct to grow and fill in as much as possible */ len = min_t(size_t, len, sizeof(sas)); if (copy_from_user(&sas, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, sas.sas_assoc_id); if (!asoc) return -EINVAL; sas.sas_rtxchunks = asoc->stats.rtxchunks; sas.sas_gapcnt = asoc->stats.gapcnt; sas.sas_outofseqtsns = asoc->stats.outofseqtsns; sas.sas_osacks = asoc->stats.osacks; sas.sas_isacks = asoc->stats.isacks; sas.sas_octrlchunks = asoc->stats.octrlchunks; sas.sas_ictrlchunks = asoc->stats.ictrlchunks; sas.sas_oodchunks = asoc->stats.oodchunks; sas.sas_iodchunks = asoc->stats.iodchunks; sas.sas_ouodchunks = asoc->stats.ouodchunks; sas.sas_iuodchunks = asoc->stats.iuodchunks; sas.sas_idupchunks = asoc->stats.idupchunks; sas.sas_opackets = asoc->stats.opackets; sas.sas_ipackets = asoc->stats.ipackets; /* New high max rto observed, will return 0 if not a single * RTO update took place. obs_rto_ipaddr will be bogus * in such a case */ sas.sas_maxrto = asoc->stats.max_obs_rto; memcpy(&sas.sas_obs_rto_ipaddr, &asoc->stats.obs_rto_ipaddr, sizeof(struct sockaddr_storage)); /* Mark beginning of a new observation period */ asoc->stats.max_obs_rto = asoc->rto_min; if (put_user(len, optlen)) return -EFAULT; pr_debug("%s: len:%d, assoc_id:%d\n", __func__, len, sas.sas_assoc_id); if (copy_to_user(optval, &sas, len)) return -EFAULT; return 0; } static int sctp_getsockopt_recvrcvinfo(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val = 0; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); if (sctp_sk(sk)->recvrcvinfo) val = 1; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val = 0; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); if (sctp_sk(sk)->recvnxtinfo) val = 1; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int sctp_getsockopt_pr_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->peer.prsctp_capable : sctp_sk(sk)->ep->prsctp_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_default_prinfo(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_default_prinfo info; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(info)) { retval = -EINVAL; goto out; } len = sizeof(info); if (copy_from_user(&info, optval, len)) goto out; asoc = sctp_id2assoc(sk, info.pr_assoc_id); if (!asoc && info.pr_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } if (asoc) { info.pr_policy = SCTP_PR_POLICY(asoc->default_flags); info.pr_value = asoc->default_timetolive; } else { struct sctp_sock *sp = sctp_sk(sk); info.pr_policy = SCTP_PR_POLICY(sp->default_flags); info.pr_value = sp->default_timetolive; } if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &info, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_prstatus params; struct sctp_association *asoc; int policy; int retval = -EINVAL; if (len < sizeof(params)) goto out; len = sizeof(params); if (copy_from_user(&params, optval, len)) { retval = -EFAULT; goto out; } policy = params.sprstat_policy; if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) || ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); if (!asoc) goto out; if (policy == SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { params.sprstat_abandoned_unsent += asoc->abandoned_unsent[policy]; params.sprstat_abandoned_sent += asoc->abandoned_sent[policy]; } } else { params.sprstat_abandoned_unsent = asoc->abandoned_unsent[__SCTP_PR_INDEX(policy)]; params.sprstat_abandoned_sent = asoc->abandoned_sent[__SCTP_PR_INDEX(policy)]; } if (put_user(len, optlen)) { retval = -EFAULT; goto out; } if (copy_to_user(optval, &params, len)) { retval = -EFAULT; goto out; } retval = 0; out: return retval; } static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_stream_out_ext *streamoute; struct sctp_association *asoc; struct sctp_prstatus params; int retval = -EINVAL; int policy; if (len < sizeof(params)) goto out; len = sizeof(params); if (copy_from_user(&params, optval, len)) { retval = -EFAULT; goto out; } policy = params.sprstat_policy; if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) || ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; streamoute = SCTP_SO(&asoc->stream, params.sprstat_sid)->ext; if (!streamoute) { /* Not allocated yet, means all stats are 0 */ params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; retval = 0; goto out; } if (policy == SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { params.sprstat_abandoned_unsent += streamoute->abandoned_unsent[policy]; params.sprstat_abandoned_sent += streamoute->abandoned_sent[policy]; } } else { params.sprstat_abandoned_unsent = streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)]; params.sprstat_abandoned_sent = streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)]; } if (put_user(len, optlen) || copy_to_user(optval, &params, len)) { retval = -EFAULT; goto out; } retval = 0; out: return retval; } static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->peer.reconf_capable : sctp_sk(sk)->ep->reconf_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->strreset_enable : sctp_sk(sk)->ep->strreset_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_scheduler(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? sctp_sched_get_sched(asoc) : sctp_sk(sk)->default_ss; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_scheduler_value(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_stream_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc) { retval = -EINVAL; goto out; } retval = sctp_sched_get_value(asoc, params.stream_id, &params.stream_value); if (retval) goto out; if (put_user(len, optlen)) { retval = -EFAULT; goto out; } if (copy_to_user(optval, &params, len)) { retval = -EFAULT; goto out; } out: return retval; } static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->peer.intl_capable : sctp_sk(sk)->ep->intl_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_reuse_port(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val; if (len < sizeof(int)) return -EINVAL; len = sizeof(int); val = sctp_sk(sk)->reuse; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_association *asoc; struct sctp_event param; __u16 subscribe; if (len < sizeof(param)) return -EINVAL; len = sizeof(param); if (copy_from_user(&param, optval, len)) return -EFAULT; if (param.se_type < SCTP_SN_TYPE_BASE || param.se_type > SCTP_SN_TYPE_MAX) return -EINVAL; asoc = sctp_id2assoc(sk, param.se_assoc_id); if (!asoc && param.se_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe; param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &param, len)) return -EFAULT; return 0; } static int sctp_getsockopt_asconf_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->peer.asconf_capable : sctp_sk(sk)->ep->asconf_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_auth_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->peer.auth_capable : sctp_sk(sk)->ep->auth_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_ecn_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->peer.ecn_capable : sctp_sk(sk)->ep->ecn_enable; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_pf_expose(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EFAULT; if (len < sizeof(params)) { retval = -EINVAL; goto out; } len = sizeof(params); if (copy_from_user(&params, optval, len)) goto out; asoc = sctp_id2assoc(sk, params.assoc_id); if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } params.assoc_value = asoc ? asoc->pf_expose : sctp_sk(sk)->pf_expose; if (put_user(len, optlen)) goto out; if (copy_to_user(optval, &params, len)) goto out; retval = 0; out: return retval; } static int sctp_getsockopt_encap_port(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_association *asoc; struct sctp_udpencaps encap; struct sctp_transport *t; __be16 encap_port; if (len < sizeof(encap)) return -EINVAL; len = sizeof(encap); if (copy_from_user(&encap, optval, len)) return -EFAULT; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ if (!sctp_is_any(sk, (union sctp_addr *)&encap.sue_address)) { t = sctp_addr_id2transport(sk, &encap.sue_address, encap.sue_assoc_id); if (!t) { pr_debug("%s: failed no transport\n", __func__); return -EINVAL; } encap_port = t->encap_port; goto out; } /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, encap.sue_assoc_id); if (!asoc && encap.sue_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } if (asoc) { encap_port = asoc->encap_port; goto out; } encap_port = sctp_sk(sk)->encap_port; out: encap.sue_port = (__force uint16_t)encap_port; if (copy_to_user(optval, &encap, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; return 0; } static int sctp_getsockopt_probe_interval(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_probeinterval params; struct sctp_association *asoc; struct sctp_transport *t; __u32 probe_interval; if (len < sizeof(params)) return -EINVAL; len = sizeof(params); if (copy_from_user(&params, optval, len)) return -EFAULT; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ if (!sctp_is_any(sk, (union sctp_addr *)&params.spi_address)) { t = sctp_addr_id2transport(sk, &params.spi_address, params.spi_assoc_id); if (!t) { pr_debug("%s: failed no transport\n", __func__); return -EINVAL; } probe_interval = jiffies_to_msecs(t->probe_interval); goto out; } /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spi_assoc_id); if (!asoc && params.spi_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } if (asoc) { probe_interval = jiffies_to_msecs(asoc->probe_interval); goto out; } probe_interval = sctp_sk(sk)->probe_interval; out: params.spi_interval = probe_interval; if (copy_to_user(optval, &params, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; return 0; } static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { int retval = 0; int len; pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft * REALLY is NOT helpful here... I am not convinced that the * semantics of getsockopt() with a level OTHER THAN SOL_SCTP * are at all well-founded. */ if (level != SOL_SCTP) { struct sctp_af *af = sctp_sk(sk)->pf->af; retval = af->getsockopt(sk, level, optname, optval, optlen); return retval; } if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; lock_sock(sk); switch (optname) { case SCTP_STATUS: retval = sctp_getsockopt_sctp_status(sk, len, optval, optlen); break; case SCTP_DISABLE_FRAGMENTS: retval = sctp_getsockopt_disable_fragments(sk, len, optval, optlen); break; case SCTP_EVENTS: retval = sctp_getsockopt_events(sk, len, optval, optlen); break; case SCTP_AUTOCLOSE: retval = sctp_getsockopt_autoclose(sk, len, optval, optlen); break; case SCTP_SOCKOPT_PEELOFF: retval = sctp_getsockopt_peeloff(sk, len, optval, optlen); break; case SCTP_SOCKOPT_PEELOFF_FLAGS: retval = sctp_getsockopt_peeloff_flags(sk, len, optval, optlen); break; case SCTP_PEER_ADDR_PARAMS: retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; case SCTP_DELAYED_SACK: retval = sctp_getsockopt_delayed_ack(sk, len, optval, optlen); break; case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; case SCTP_GET_PEER_ADDRS: retval = sctp_getsockopt_peer_addrs(sk, len, optval, optlen); break; case SCTP_GET_LOCAL_ADDRS: retval = sctp_getsockopt_local_addrs(sk, len, optval, optlen); break; case SCTP_SOCKOPT_CONNECTX3: retval = sctp_getsockopt_connectx3(sk, len, optval, optlen); break; case SCTP_DEFAULT_SEND_PARAM: retval = sctp_getsockopt_default_send_param(sk, len, optval, optlen); break; case SCTP_DEFAULT_SNDINFO: retval = sctp_getsockopt_default_sndinfo(sk, len, optval, optlen); break; case SCTP_PRIMARY_ADDR: retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen); break; case SCTP_NODELAY: retval = sctp_getsockopt_nodelay(sk, len, optval, optlen); break; case SCTP_RTOINFO: retval = sctp_getsockopt_rtoinfo(sk, len, optval, optlen); break; case SCTP_ASSOCINFO: retval = sctp_getsockopt_associnfo(sk, len, optval, optlen); break; case SCTP_I_WANT_MAPPED_V4_ADDR: retval = sctp_getsockopt_mappedv4(sk, len, optval, optlen); break; case SCTP_MAXSEG: retval = sctp_getsockopt_maxseg(sk, len, optval, optlen); break; case SCTP_GET_PEER_ADDR_INFO: retval = sctp_getsockopt_peer_addr_info(sk, len, optval, optlen); break; case SCTP_ADAPTATION_LAYER: retval = sctp_getsockopt_adaptation_layer(sk, len, optval, optlen); break; case SCTP_CONTEXT: retval = sctp_getsockopt_context(sk, len, optval, optlen); break; case SCTP_FRAGMENT_INTERLEAVE: retval = sctp_getsockopt_fragment_interleave(sk, len, optval, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: retval = sctp_getsockopt_partial_delivery_point(sk, len, optval, optlen); break; case SCTP_MAX_BURST: retval = sctp_getsockopt_maxburst(sk, len, optval, optlen); break; case SCTP_AUTH_KEY: case SCTP_AUTH_CHUNK: case SCTP_AUTH_DELETE_KEY: case SCTP_AUTH_DEACTIVATE_KEY: retval = -EOPNOTSUPP; break; case SCTP_HMAC_IDENT: retval = sctp_getsockopt_hmac_ident(sk, len, optval, optlen); break; case SCTP_AUTH_ACTIVE_KEY: retval = sctp_getsockopt_active_key(sk, len, optval, optlen); break; case SCTP_PEER_AUTH_CHUNKS: retval = sctp_getsockopt_peer_auth_chunks(sk, len, optval, optlen); break; case SCTP_LOCAL_AUTH_CHUNKS: retval = sctp_getsockopt_local_auth_chunks(sk, len, optval, optlen); break; case SCTP_GET_ASSOC_NUMBER: retval = sctp_getsockopt_assoc_number(sk, len, optval, optlen); break; case SCTP_GET_ASSOC_ID_LIST: retval = sctp_getsockopt_assoc_ids(sk, len, optval, optlen); break; case SCTP_AUTO_ASCONF: retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); break; case SCTP_PEER_ADDR_THLDS: retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen, false); break; case SCTP_PEER_ADDR_THLDS_V2: retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen, true); break; case SCTP_GET_ASSOC_STATS: retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen); break; case SCTP_RECVRCVINFO: retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen); break; case SCTP_RECVNXTINFO: retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen); break; case SCTP_PR_SUPPORTED: retval = sctp_getsockopt_pr_supported(sk, len, optval, optlen); break; case SCTP_DEFAULT_PRINFO: retval = sctp_getsockopt_default_prinfo(sk, len, optval, optlen); break; case SCTP_PR_ASSOC_STATUS: retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; case SCTP_PR_STREAM_STATUS: retval = sctp_getsockopt_pr_streamstatus(sk, len, optval, optlen); break; case SCTP_RECONFIG_SUPPORTED: retval = sctp_getsockopt_reconfig_supported(sk, len, optval, optlen); break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); break; case SCTP_STREAM_SCHEDULER: retval = sctp_getsockopt_scheduler(sk, len, optval, optlen); break; case SCTP_STREAM_SCHEDULER_VALUE: retval = sctp_getsockopt_scheduler_value(sk, len, optval, optlen); break; case SCTP_INTERLEAVING_SUPPORTED: retval = sctp_getsockopt_interleaving_supported(sk, len, optval, optlen); break; case SCTP_REUSE_PORT: retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen); break; case SCTP_EVENT: retval = sctp_getsockopt_event(sk, len, optval, optlen); break; case SCTP_ASCONF_SUPPORTED: retval = sctp_getsockopt_asconf_supported(sk, len, optval, optlen); break; case SCTP_AUTH_SUPPORTED: retval = sctp_getsockopt_auth_supported(sk, len, optval, optlen); break; case SCTP_ECN_SUPPORTED: retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen); break; case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen); break; case SCTP_REMOTE_UDP_ENCAPS_PORT: retval = sctp_getsockopt_encap_port(sk, len, optval, optlen); break; case SCTP_PLPMTUD_PROBE_INTERVAL: retval = sctp_getsockopt_probe_interval(sk, len, optval, optlen); break; default: retval = -ENOPROTOOPT; break; } release_sock(sk); return retval; } static bool sctp_bpf_bypass_getsockopt(int level, int optname) { if (level == SOL_SCTP) { switch (optname) { case SCTP_SOCKOPT_PEELOFF: case SCTP_SOCKOPT_PEELOFF_FLAGS: case SCTP_SOCKOPT_CONNECTX3: return true; default: return false; } } return false; } static int sctp_hash(struct sock *sk) { /* STUB */ return 0; } static void sctp_unhash(struct sock *sk) { /* STUB */ } /* Check if port is acceptable. Possibly find first available port. * * The port hash table (contained in the 'global' SCTP protocol storage * returned by struct sctp_protocol *sctp_get_protocol()). The hash * table is an array of 4096 lists (sctp_bind_hashbucket). Each * list (the list number is the port number hashed out, so as you * would expect from a hash function, all the ports in a given list have * such a number that hashes out to the same list number; you were * expecting that, right?); so each list has a set of ports, with a * link to the socket (struct sock) that uses it, the port number and * a fastreuse flag (FIXME: NPI ipg). */ static struct sctp_bind_bucket *sctp_bucket_create( struct sctp_bind_hashbucket *head, struct net *, unsigned short snum); static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_sock *sp = sctp_sk(sk); bool reuse = (sk->sk_reuse || sp->reuse); struct sctp_bind_hashbucket *head; /* hash list */ struct net *net = sock_net(sk); kuid_t uid = sock_i_uid(sk); struct sctp_bind_bucket *pp; unsigned short snum; int ret; snum = ntohs(addr->v4.sin_port); pr_debug("%s: begins, snum:%d\n", __func__, snum); if (snum == 0) { /* Search for an available port. */ int low, high, remaining, index; unsigned int rover; inet_sk_get_local_port_range(sk, &low, &high); remaining = (high - low) + 1; rover = get_random_u32_below(remaining) + low; do { rover++; if ((rover < low) || (rover > high)) rover = low; if (inet_is_local_reserved_port(net, rover)) continue; index = sctp_phashfn(net, rover); head = &sctp_port_hashtable[index]; spin_lock_bh(&head->lock); sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && net_eq(net, pp->net)) goto next; break; next: spin_unlock_bh(&head->lock); cond_resched(); } while (--remaining > 0); /* Exhausted local port range during search? */ ret = 1; if (remaining <= 0) return ret; /* OK, here is the one we will use. HEAD (the port * hash table list entry) is non-NULL and we hold it's * mutex. */ snum = rover; } else { /* We are given an specific port number; we verify * that it is not being used. If it is used, we will * exahust the search in the hash list corresponding * to the port number (snum) - we detect that with the * port iterator, pp being NULL. */ head = &sctp_port_hashtable[sctp_phashfn(net, snum)]; spin_lock_bh(&head->lock); sctp_for_each_hentry(pp, &head->chain) { if ((pp->port == snum) && net_eq(pp->net, net)) goto pp_found; } } pp = NULL; goto pp_not_found; pp_found: if (!hlist_empty(&pp->owner)) { /* We had a port hash table hit - there is an * available port (pp != NULL) and it is being * used by other socket (pp->owner not empty); that other * socket is going to be sk2. */ struct sock *sk2; pr_debug("%s: found a possible match\n", __func__); if ((pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING) || (pp->fastreuseport && sk->sk_reuseport && uid_eq(pp->fastuid, uid))) goto success; /* Run through the list of sockets bound to the port * (pp->port) [via the pointers bind_next and * bind_pprev in the struct sock *sk2 (pp->sk)]. On each one, * we get the endpoint they describe and run through * the endpoint's list of IP (v4 or v6) addresses, * comparing each of the addresses with the address of * the socket sk. If we find a match, then that means * that this port/socket (sk) combination are already * in an endpoint. */ sk_for_each_bound(sk2, &pp->owner) { int bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); struct sctp_sock *sp2 = sctp_sk(sk2); struct sctp_endpoint *ep2 = sp2->ep; if (sk == sk2 || (reuse && (sk2->sk_reuse || sp2->reuse) && sk2->sk_state != SCTP_SS_LISTENING) || (sk->sk_reuseport && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)))) continue; if ((!sk->sk_bound_dev_if || !bound_dev_if2 || sk->sk_bound_dev_if == bound_dev_if2) && sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, sp2, sp)) { ret = 1; goto fail_unlock; } } pr_debug("%s: found a match\n", __func__); } pp_not_found: /* If there was a hash table miss, create a new port. */ ret = 1; if (!pp && !(pp = sctp_bucket_create(head, net, snum))) goto fail_unlock; /* In either case (hit or miss), make sure fastreuse is 1 only * if sk->sk_reuse is too (that is, if the caller requested * SO_REUSEADDR on this socket -sk-). */ if (hlist_empty(&pp->owner)) { if (reuse && sk->sk_state != SCTP_SS_LISTENING) pp->fastreuse = 1; else pp->fastreuse = 0; if (sk->sk_reuseport) { pp->fastreuseport = 1; pp->fastuid = uid; } else { pp->fastreuseport = 0; } } else { if (pp->fastreuse && (!reuse || sk->sk_state == SCTP_SS_LISTENING)) pp->fastreuse = 0; if (pp->fastreuseport && (!sk->sk_reuseport || !uid_eq(pp->fastuid, uid))) pp->fastreuseport = 0; } /* We are set, so fill up all the data in the hash table * entry, tie the socket list information with the rest of the * sockets FIXME: Blurry, NPI (ipg). */ success: if (!sp->bind_hash) { inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &pp->owner); sp->bind_hash = pp; } ret = 0; fail_unlock: spin_unlock_bh(&head->lock); return ret; } /* Assign a 'snum' port to the socket. If snum == 0, an ephemeral * port is requested. */ static int sctp_get_port(struct sock *sk, unsigned short snum) { union sctp_addr addr; struct sctp_af *af = sctp_sk(sk)->pf->af; /* Set up a dummy address struct from the sk. */ af->from_sk(&addr, sk); addr.v4.sin_port = htons(snum); /* Note: sk->sk_num gets filled in if ephemeral port request. */ return sctp_get_port_local(sk, &addr); } /* * Move a socket to LISTENING state. */ static int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct crypto_shash *tfm = NULL; char alg[32]; int err; /* Allocate HMAC for generating cookie. */ if (!sp->hmac && sp->sctp_hmac_alg) { sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg); tfm = crypto_alloc_shash(alg, 0, 0); if (IS_ERR(tfm)) { net_info_ratelimited("failed to load transform for %s: %ld\n", sp->sctp_hmac_alg, PTR_ERR(tfm)); return -ENOSYS; } sctp_sk(sk)->hmac = tfm; } /* * If a bind() or sctp_bindx() is not called prior to a listen() * call that allows new associations to be accepted, the system * picks an ephemeral port and will choose an address set equivalent * to binding with a wildcard address. * * This is not currently spelled out in the SCTP sockets * extensions draft, but follows the practice as seen in TCP * sockets. * */ inet_sk_set_state(sk, SCTP_SS_LISTENING); if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) { err = -EAGAIN; goto err; } } else { if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { err = -EADDRINUSE; goto err; } } WRITE_ONCE(sk->sk_max_ack_backlog, backlog); err = sctp_hash_endpoint(ep); if (err) goto err; return 0; err: inet_sk_set_state(sk, SCTP_SS_CLOSED); return err; } /* * 4.1.3 / 5.1.3 listen() * * By default, new associations are not accepted for UDP style sockets. * An application uses listen() to mark a socket as being able to * accept new associations. * * On TCP style sockets, applications use listen() to ready the SCTP * endpoint for accepting inbound associations. * * On both types of endpoints a backlog of '0' disables listening. * * Move a socket to LISTENING state. */ int sctp_inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct sctp_endpoint *ep = sctp_sk(sk)->ep; int err = -EINVAL; if (unlikely(backlog < 0)) return err; lock_sock(sk); /* Peeled-off sockets are not allowed to listen(). */ if (sctp_style(sk, UDP_HIGH_BANDWIDTH)) goto out; if (sock->state != SS_UNCONNECTED) goto out; if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED)) goto out; /* If backlog is zero, disable listening. */ if (!backlog) { if (sctp_sstate(sk, CLOSED)) goto out; err = 0; sctp_unhash_endpoint(ep); sk->sk_state = SCTP_SS_CLOSED; if (sk->sk_reuse || sctp_sk(sk)->reuse) sctp_sk(sk)->bind_hash->fastreuse = 1; goto out; } /* If we are already listening, just update the backlog */ if (sctp_sstate(sk, LISTENING)) WRITE_ONCE(sk->sk_max_ack_backlog, backlog); else { err = sctp_listen_start(sk, backlog); if (err) goto out; } err = 0; out: release_sock(sk); return err; } /* * This function is done by modeling the current datagram_poll() and the * tcp_poll(). Note that, based on these implementations, we don't * lock the socket in this function, even though it seems that, * ideally, locking or some other mechanisms can be used to ensure * the integrity of the counters (sndbuf and wmem_alloc) used * in this place. We assume that we don't need locks either until proven * otherwise. * * Another thing to note is that we include the Async I/O support * here, again, by modeling the current TCP/UDP code. We don't have * a good way to test with it yet. */ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct sctp_sock *sp = sctp_sk(sk); __poll_t mask; poll_wait(file, sk_sleep(sk), wait); sock_rps_record_flow(sk); /* A TCP-style listening socket becomes readable when the accept queue * is not empty. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) return (!list_empty(&sp->ep->asocs)) ? (EPOLLIN | EPOLLRDNORM) : 0; mask = 0; /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* Is it readable? Reconsider this code with TCP-style support. */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* The association is either gone or not ready. */ if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED)) return mask; /* Is it writable? */ if (sctp_writeable(sk)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* * Since the socket is not locked, the buffer * might be made available after the writeable check and * before the bit is set. This could cause a lost I/O * signal. tcp_poll() has a race breaker for this race * condition. Based on their implementation, we put * in the following code to cover it as well. */ if (sctp_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM; } return mask; } /******************************************************************** * 2nd Level Abstractions ********************************************************************/ static struct sctp_bind_bucket *sctp_bucket_create( struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum) { struct sctp_bind_bucket *pp; pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC); if (pp) { SCTP_DBG_OBJCNT_INC(bind_bucket); pp->port = snum; pp->fastreuse = 0; INIT_HLIST_HEAD(&pp->owner); pp->net = net; hlist_add_head(&pp->node, &head->chain); } return pp; } /* Caller must hold hashbucket lock for this tb with local BH disabled */ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) { if (pp && hlist_empty(&pp->owner)) { __hlist_del(&pp->node); kmem_cache_free(sctp_bucket_cachep, pp); SCTP_DBG_OBJCNT_DEC(bind_bucket); } } /* Release this socket's reference to a local port. */ static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; spin_lock(&head->lock); pp = sctp_sk(sk)->bind_hash; __sk_del_bind_node(sk); sctp_sk(sk)->bind_hash = NULL; inet_sk(sk)->inet_num = 0; sctp_bucket_destroy(pp); spin_unlock(&head->lock); } void sctp_put_port(struct sock *sk) { local_bh_disable(); __sctp_put_port(sk); local_bh_enable(); } /* * The system picks an ephemeral port and choose an address set equivalent * to binding with a wildcard address. * One of those addresses will be the primary address for the association. * This automatically enables the multihoming capability of SCTP. */ static int sctp_autobind(struct sock *sk) { union sctp_addr autoaddr; struct sctp_af *af; __be16 port; /* Initialize a local sockaddr structure to INADDR_ANY. */ af = sctp_sk(sk)->pf->af; port = htons(inet_sk(sk)->inet_num); af->inaddr_any(&autoaddr, port); return sctp_do_bind(sk, &autoaddr, af->sockaddr_len); } /* Parse out IPPROTO_SCTP CMSG headers. Perform only minimal validation. * * From RFC 2292 * 4.2 The cmsghdr Structure * * * When ancillary data is sent or received, any number of ancillary data * objects can be specified by the msg_control and msg_controllen members of * the msghdr structure, because each object is preceded by * a cmsghdr structure defining the object's length (the cmsg_len member). * Historically Berkeley-derived implementations have passed only one object * at a time, but this API allows multiple objects to be * passed in a single call to sendmsg() or recvmsg(). The following example * shows two ancillary data objects in a control buffer. * * |<--------------------------- msg_controllen -------------------------->| * | | * * |<----- ancillary data object ----->|<----- ancillary data object ----->| * * |<---------- CMSG_SPACE() --------->|<---------- CMSG_SPACE() --------->| * | | | * * |<---------- cmsg_len ---------->| |<--------- cmsg_len ----------->| | * * |<--------- CMSG_LEN() --------->| |<-------- CMSG_LEN() ---------->| | * | | | | | * * +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+ * |cmsg_|cmsg_|cmsg_|XX| |XX|cmsg_|cmsg_|cmsg_|XX| |XX| * * |len |level|type |XX|cmsg_data[]|XX|len |level|type |XX|cmsg_data[]|XX| * * +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+ * ^ * | * * msg_control * points here */ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) { struct msghdr *my_msg = (struct msghdr *)msg; struct cmsghdr *cmsg; for_each_cmsghdr(cmsg, my_msg) { if (!CMSG_OK(my_msg, cmsg)) return -EINVAL; /* Should we parse this header or ignore? */ if (cmsg->cmsg_level != IPPROTO_SCTP) continue; /* Strictly check lengths following example in SCM code. */ switch (cmsg->cmsg_type) { case SCTP_INIT: /* SCTP Socket API Extension * 5.3.1 SCTP Initiation Structure (SCTP_INIT) * * This cmsghdr structure provides information for * initializing new SCTP associations with sendmsg(). * The SCTP_INITMSG socket option uses this same data * structure. This structure is not used for * recvmsg(). * * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg */ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_initmsg))) return -EINVAL; cmsgs->init = CMSG_DATA(cmsg); break; case SCTP_SNDRCV: /* SCTP Socket API Extension * 5.3.2 SCTP Header Information Structure(SCTP_SNDRCV) * * This cmsghdr structure specifies SCTP options for * sendmsg() and describes SCTP header information * about a received message through recvmsg(). * * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo */ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) return -EINVAL; cmsgs->srinfo = CMSG_DATA(cmsg); if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_SACK_IMMEDIATELY | SCTP_SENDALL | SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; case SCTP_SNDINFO: /* SCTP Socket API Extension * 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO) * * This cmsghdr structure specifies SCTP options for * sendmsg(). This structure and SCTP_RCVINFO replaces * SCTP_SNDRCV which has been deprecated. * * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ --------------------- * IPPROTO_SCTP SCTP_SNDINFO struct sctp_sndinfo */ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndinfo))) return -EINVAL; cmsgs->sinfo = CMSG_DATA(cmsg); if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_SACK_IMMEDIATELY | SCTP_SENDALL | SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; case SCTP_PRINFO: /* SCTP Socket API Extension * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO) * * This cmsghdr structure specifies SCTP options for sendmsg(). * * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ --------------------- * IPPROTO_SCTP SCTP_PRINFO struct sctp_prinfo */ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo))) return -EINVAL; cmsgs->prinfo = CMSG_DATA(cmsg); if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK) return -EINVAL; if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE) cmsgs->prinfo->pr_value = 0; break; case SCTP_AUTHINFO: /* SCTP Socket API Extension * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO) * * This cmsghdr structure specifies SCTP options for sendmsg(). * * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ --------------------- * IPPROTO_SCTP SCTP_AUTHINFO struct sctp_authinfo */ if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo))) return -EINVAL; cmsgs->authinfo = CMSG_DATA(cmsg); break; case SCTP_DSTADDRV4: case SCTP_DSTADDRV6: /* SCTP Socket API Extension * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6) * * This cmsghdr structure specifies SCTP options for sendmsg(). * * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ --------------------- * IPPROTO_SCTP SCTP_DSTADDRV4 struct in_addr * ------------ ------------ --------------------- * IPPROTO_SCTP SCTP_DSTADDRV6 struct in6_addr */ cmsgs->addrs_msg = my_msg; break; default: return -EINVAL; } } return 0; } /* * Wait for a packet.. * Note: This function is the same function as in core/datagram.c * with a few modifications to make lksctp work. */ static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; DEFINE_WAIT(wait); prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); if (error) goto out; if (!skb_queue_empty(&sk->sk_receive_queue)) goto ready; /* Socket shut down? */ if (sk->sk_shutdown & RCV_SHUTDOWN) goto out; /* Sequenced packets can come disconnected. If so we report the * problem. */ error = -ENOTCONN; /* Is there a good reason to think that we may receive some data? */ if (list_empty(&sctp_sk(sk)->ep->asocs) && !sctp_sstate(sk, LISTENING)) goto out; /* Handle signals. */ if (signal_pending(current)) goto interrupted; /* Let another process have a go. Since we are going to sleep * anyway. Note: This may cause odd behaviors if the message * does not fit in the user's buffer, but this seems to be the * only way to honor MSG_DONTWAIT realistically. */ release_sock(sk); *timeo_p = schedule_timeout(*timeo_p); lock_sock(sk); ready: finish_wait(sk_sleep(sk), &wait); return 0; interrupted: error = sock_intr_errno(*timeo_p); out: finish_wait(sk_sleep(sk), &wait); *err = error; return error; } /* Receive a datagram. * Note: This is pretty much the same routine as in core/datagram.c * with a few changes to make lksctp work. */ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err) { int error; struct sk_buff *skb; long timeo; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo, MAX_SCHEDULE_TIMEOUT); do { /* Again only user level code calls this function, * so nothing interrupt level * will suddenly eat the receive_queue. * * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { skb = skb_peek(&sk->sk_receive_queue); if (skb) refcount_inc(&skb->users); } else { skb = __skb_dequeue(&sk->sk_receive_queue); } if (skb) return skb; /* Caller is allowed not to check sk->sk_err before calling. */ error = sock_error(sk); if (error) goto no_packet; if (sk->sk_shutdown & RCV_SHUTDOWN) break; /* User doesn't want to wait. */ error = -EAGAIN; if (!timeo) goto no_packet; } while (sctp_wait_for_packet(sk, err, &timeo) == 0); return NULL; no_packet: *err = error; return NULL; } /* If sndbuf has changed, wake up per association sndbuf waiters. */ static void __sctp_write_space(struct sctp_association *asoc) { struct sock *sk = asoc->base.sk; if (sctp_wspace(asoc) <= 0) return; if (waitqueue_active(&asoc->wait)) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { struct socket_wq *wq; rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (wq) { if (waitqueue_active(&wq->wait)) wake_up_interruptible(&wq->wait); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ if (!(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } } static void sctp_wake_up_waiters(struct sock *sk, struct sctp_association *asoc) { struct sctp_association *tmp = asoc; /* We do accounting for the sndbuf space per association, * so we only need to wake our own association. */ if (asoc->ep->sndbuf_policy) return __sctp_write_space(asoc); /* If association goes down and is just flushing its * outq, then just normally notify others. */ if (asoc->base.dead) return sctp_write_space(sk); /* Accounting for the sndbuf space is per socket, so we * need to wake up others, try to be fair and in case of * other associations, let them have a go first instead * of just doing a sctp_write_space() call. * * Note that we reach sctp_wake_up_waiters() only when * associations free up queued chunks, thus we are under * lock and the list of associations on a socket is * guaranteed not to change. */ for (tmp = list_next_entry(tmp, asocs); 1; tmp = list_next_entry(tmp, asocs)) { /* Manually skip the head element. */ if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs)) continue; /* Wake up association. */ __sctp_write_space(tmp); /* We've reached the end. */ if (tmp == asoc) break; } } /* Do accounting for the sndbuf space. * Decrement the used sndbuf space of the corresponding association by the * data size which was just transmitted(freed). */ static void sctp_wfree(struct sk_buff *skb) { struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg; struct sctp_association *asoc = chunk->asoc; struct sock *sk = asoc->base.sk; sk_mem_uncharge(sk, skb->truesize); sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk))); asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk); WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc)); if (chunk->shkey) { struct sctp_shared_key *shkey = chunk->shkey; /* refcnt == 2 and !list_empty mean after this release, it's * not being used anywhere, and it's time to notify userland * that this shkey can be freed if it's been deactivated. */ if (shkey->deactivated && !list_empty(&shkey->key_list) && refcount_read(&shkey->refcnt) == 2) { struct sctp_ulpevent *ev; ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id, SCTP_AUTH_FREE_KEY, GFP_KERNEL); if (ev) asoc->stream.si->enqueue_event(&asoc->ulpq, ev); } sctp_auth_shkey_release(chunk->shkey); } sock_wfree(skb); sctp_wake_up_waiters(sk, asoc); sctp_association_put(asoc); } /* Do accounting for the receive space on the socket. * Accounting for the association is done in ulpevent.c * We set this as a destructor for the cloned data skbs so that * accounting is done at the correct time. */ void sctp_sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct sctp_ulpevent *event = sctp_skb2event(skb); atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); /* * Mimic the behavior of sock_rfree */ sk_mem_uncharge(sk, event->rmem_len); } /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len) { struct sock *sk = asoc->base.sk; long current_timeo = *timeo_p; DEFINE_WAIT(wait); int err = 0; pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); /* Increment the association's refcnt. */ sctp_association_hold(asoc); /* Wait on the association specific sndbuf space. */ for (;;) { prepare_to_wait_exclusive(&asoc->wait, &wait, TASK_INTERRUPTIBLE); if (asoc->base.dead) goto do_dead; if (!*timeo_p) goto do_nonblock; if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; if (signal_pending(current)) goto do_interrupted; if ((int)msg_len <= sctp_wspace(asoc) && sk_wmem_schedule(sk, msg_len)) break; /* Let another process have a go. Since we are going * to sleep anyway. */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); if (sk != asoc->base.sk) goto do_error; *timeo_p = current_timeo; } out: finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ sctp_association_put(asoc); return err; do_dead: err = -ESRCH; goto out; do_error: err = -EPIPE; goto out; do_interrupted: err = sock_intr_errno(*timeo_p); goto out; do_nonblock: err = -EAGAIN; goto out; } void sctp_data_ready(struct sock *sk) { struct socket_wq *wq; trace_sk_data_ready(sk); rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } /* If socket sndbuf has changed, wake up all per association waiters. */ void sctp_write_space(struct sock *sk) { struct sctp_association *asoc; /* Wake up the tasks in each wait queue. */ list_for_each_entry(asoc, &((sctp_sk(sk))->ep->asocs), asocs) { __sctp_write_space(asoc); } } /* Is there any sndbuf space available on the socket? * * Note that sk_wmem_alloc is the sum of the send buffers on all of the * associations on the same socket. For a UDP-style socket with * multiple associations, it is possible for it to be "unwriteable" * prematurely. I assume that this is acceptable because * a premature "unwriteable" is better than an accidental "writeable" which * would cause an unwanted block under certain circumstances. For the 1-1 * UDP-style sockets or TCP-style sockets, this code should work. * - Daisy */ static bool sctp_writeable(const struct sock *sk) { return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued); } /* Wait for an association to go into ESTABLISHED state. If timeout is 0, * returns immediately with EINPROGRESS. */ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) { struct sock *sk = asoc->base.sk; int err = 0; long current_timeo = *timeo_p; DEFINE_WAIT(wait); pr_debug("%s: asoc:%p, timeo:%ld\n", __func__, asoc, *timeo_p); /* Increment the association's refcnt. */ sctp_association_hold(asoc); for (;;) { prepare_to_wait_exclusive(&asoc->wait, &wait, TASK_INTERRUPTIBLE); if (!*timeo_p) goto do_nonblock; if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || asoc->base.dead) goto do_error; if (signal_pending(current)) goto do_interrupted; if (sctp_state(asoc, ESTABLISHED)) break; /* Let another process have a go. Since we are going * to sleep anyway. */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); *timeo_p = current_timeo; } out: finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ sctp_association_put(asoc); return err; do_error: if (asoc->init_err_counter + 1 > asoc->max_init_attempts) err = -ETIMEDOUT; else err = -ECONNREFUSED; goto out; do_interrupted: err = sock_intr_errno(*timeo_p); goto out; do_nonblock: err = -EINPROGRESS; goto out; } static int sctp_wait_for_accept(struct sock *sk, long timeo) { struct sctp_endpoint *ep; int err = 0; DEFINE_WAIT(wait); ep = sctp_sk(sk)->ep; for (;;) { prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&ep->asocs)) { release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); } err = -EINVAL; if (!sctp_sstate(sk, LISTENING) || (sk->sk_shutdown & RCV_SHUTDOWN)) break; err = 0; if (!list_empty(&ep->asocs)) break; err = sock_intr_errno(timeo); if (signal_pending(current)) break; err = -EAGAIN; if (!timeo) break; } finish_wait(sk_sleep(sk), &wait); return err; } static void sctp_wait_for_close(struct sock *sk, long timeout) { DEFINE_WAIT(wait); do { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&sctp_sk(sk)->ep->asocs)) break; release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); } while (!signal_pending(current) && timeout); finish_wait(sk_sleep(sk), &wait); } static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) { struct sk_buff *frag; if (!skb->data_len) goto done; /* Don't forget the fragments. */ skb_walk_frags(skb, frag) sctp_skb_set_owner_r_frag(frag, sk); done: sctp_skb_set_owner_r(skb, sk); } void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc) { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; struct sctp_sock *sp = sctp_sk(sk); newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; newsk->sk_flags = sk->sk_flags; newsk->sk_tsflags = sk->sk_tsflags; newsk->sk_no_check_tx = sk->sk_no_check_tx; newsk->sk_no_check_rx = sk->sk_no_check_rx; newsk->sk_reuse = sk->sk_reuse; sctp_sk(newsk)->reuse = sp->reuse; newsk->sk_shutdown = sk->sk_shutdown; newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; newsk->sk_sndbuf = sk->sk_sndbuf; newsk->sk_rcvbuf = sk->sk_rcvbuf; newsk->sk_lingertime = sk->sk_lingertime; newsk->sk_rcvtimeo = sk->sk_rcvtimeo; newsk->sk_sndtimeo = sk->sk_sndtimeo; newsk->sk_rxhash = sk->sk_rxhash; newinet = inet_sk(newsk); /* Initialize sk's sport, dport, rcv_saddr and daddr for * getsockname() and getpeername() */ newinet->inet_sport = inet->inet_sport; newinet->inet_saddr = inet->inet_saddr; newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; atomic_set(&newinet->inet_id, get_random_u16()); newinet->uc_ttl = inet->uc_ttl; inet_set_bit(MC_LOOP, newsk); newinet->mc_ttl = 1; newinet->mc_index = 0; newinet->mc_list = NULL; if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); /* Set newsk security attributes from original sk and connection * security attribute from asoc. */ security_sctp_sk_clone(asoc, sk, newsk); } static inline void sctp_copy_descendant(struct sock *sk_to, const struct sock *sk_from) { size_t ancestor_size = sizeof(struct inet_sock); ancestor_size += sk_from->sk_prot->obj_size; ancestor_size -= offsetof(struct sctp_sock, pd_lobby); __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_association *assoc, enum sctp_socket_type type) { struct sctp_sock *oldsp = sctp_sk(oldsk); struct sctp_sock *newsp = sctp_sk(newsk); struct sctp_bind_bucket *pp; /* hash list port iterator */ struct sctp_endpoint *newep = newsp->ep; struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; int err; /* Migrate socket buffer sizes and all the socket level options to the * new socket. */ newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. */ newsp->ep = newep; newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), inet_sk(oldsk)->inet_num)]; spin_lock_bh(&head->lock); pp = sctp_sk(oldsk)->bind_hash; sk_add_bind_node(newsk, &pp->owner); sctp_sk(newsk)->bind_hash = pp; inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; spin_unlock_bh(&head->lock); /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly */ err = sctp_bind_addr_dup(&newsp->ep->base.bind_addr, &oldsp->ep->base.bind_addr, GFP_KERNEL); if (err) return err; /* New ep's auth_hmacs should be set if old ep's is set, in case * that net->sctp.auth_enable has been changed to 0 by users and * new ep's auth_hmacs couldn't be set in sctp_endpoint_init(). */ if (oldsp->ep->auth_hmacs) { err = sctp_auth_init_hmacs(newsp->ep, GFP_KERNEL); if (err) return err; } sctp_auto_asconf_init(newsp); /* Move any messages in the old socket's receive queue that are for the * peeled off association to the new socket's receive queue. */ sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); sctp_skb_set_owner_r_frag(skb, newsk); } } /* Clean up any messages pending delivery due to partial * delivery. Three cases: * 1) No partial deliver; no work. * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby. * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue. */ atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode); if (atomic_read(&sctp_sk(oldsk)->pd_mode)) { struct sk_buff_head *queue; /* Decide which queue to move pd_lobby skbs to. */ if (assoc->ulpq.pd_mode) { queue = &newsp->pd_lobby; } else queue = &newsk->sk_receive_queue; /* Walk through the pd_lobby, looking for skbs that * need moved to the new socket. */ sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); sctp_skb_set_owner_r_frag(skb, newsk); } } /* Clear up any skbs waiting for the partial * delivery to finish. */ if (assoc->ulpq.pd_mode) sctp_clear_pd(oldsk, NULL); } sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag); /* Set the type of socket to indicate that it is peeled off from the * original UDP-style socket or created with the accept() call on a * TCP-style socket.. */ newsp->type = type; /* Mark the new socket "in-use" by the user so that any packets * that may arrive on the association after we've moved it are * queued to the backlog. This prevents a potential race between * backlog processing on the old socket and new-packet processing * on the new socket. * * The caller has just allocated newsk so we can guarantee that other * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); sctp_for_each_tx_datachunk(assoc, true, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); sctp_for_each_tx_datachunk(assoc, false, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. */ if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { inet_sk_set_state(newsk, SCTP_SS_CLOSED); newsk->sk_shutdown |= RCV_SHUTDOWN; } else { inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED); } release_sock(newsk); return 0; } /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", .owner = THIS_MODULE, .close = sctp_close, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, .init = sctp_init_sock, .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt, .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, .bind_add = sctp_bind_add, .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, .no_autobind = true, .obj_size = sizeof(struct sctp_sock), .useroffset = offsetof(struct sctp_sock, subscribe), .usersize = offsetof(struct sctp_sock, initmsg) - offsetof(struct sctp_sock, subscribe) + sizeof_field(struct sctp_sock, initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, .memory_allocated = &sctp_memory_allocated, .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, .sockets_allocated = &sctp_sockets_allocated, }; #if IS_ENABLED(CONFIG_IPV6) static void sctp_v6_destruct_sock(struct sock *sk) { sctp_destruct_common(sk); inet6_sock_destruct(sk); } static int sctp_v6_init_sock(struct sock *sk) { int ret = sctp_init_sock(sk); if (!ret) sk->sk_destruct = sctp_v6_destruct_sock; return ret; } struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, .close = sctp_close, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, .init = sctp_v6_init_sock, .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt, .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, .bind_add = sctp_bind_add, .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, .no_autobind = true, .obj_size = sizeof(struct sctp6_sock), .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6), .useroffset = offsetof(struct sctp6_sock, sctp.subscribe), .usersize = offsetof(struct sctp6_sock, sctp.initmsg) - offsetof(struct sctp6_sock, sctp.subscribe) + sizeof_field(struct sctp6_sock, sctp.initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, .memory_allocated = &sctp_memory_allocated, .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc, .sockets_allocated = &sctp_sockets_allocated, }; #endif /* IS_ENABLED(CONFIG_IPV6) */
9851 9849 10 1 9 1 8 4 8 1 7 5 5 3 4 4 4 4 1 4 3 3 3 3 3 3 3 3 10 4 2 2 1 1 1 4 1740 1106 1102 1740 10 10 1 634 37 1739 1742 190 634 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 // SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm_device.c - IPsec device offloading code. * * Copyright (c) 2015 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/errno.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> #include <net/gso.h> #include <net/xfrm.h> #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); skb_reset_mac_len(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header -= x->props.header_len; pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); } static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); pskb_pull(skb, skb->mac_len + x->props.header_len); } static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, unsigned int hsize) { struct xfrm_offload *xo = xfrm_offload(skb); int phlen = 0; if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); if (x->sel.family != AF_INET6) { phlen = IPV4_BEET_PHMAXLEN; if (x->outer_mode.family == AF_INET6) phlen += sizeof(struct ipv6hdr) - sizeof(struct iphdr); } pskb_pull(skb, skb->mac_len + hsize + (x->props.header_len - phlen)); } /* Adjust pointers into the packet when IPsec is done at layer2 */ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_tunnel_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_TRANSPORT: if (x->outer_mode.family == AF_INET) return __xfrm_transport_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_transport_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_BEET: if (x->outer_mode.family == AF_INET) return __xfrm_mode_beet_prep(x, skb, sizeof(struct iphdr)); if (x->outer_mode.family == AF_INET6) return __xfrm_mode_beet_prep(x, skb, sizeof(struct ipv6hdr)); break; case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: break; } } static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) { struct xfrm_offload *xo = xfrm_offload(skb); __u32 seq = xo->seq.low; seq += skb_shinfo(skb)->gso_segs; if (unlikely(seq < xo->seq.low)) return true; return false; } struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; unsigned long flags; struct xfrm_state *x; struct softnet_data *sd; struct sk_buff *skb2, *nskb, *pskb = NULL; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); struct net_device *dev = skb->dev; struct sec_path *sp; if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN) return skb; /* The packet was sent to HW IPsec packet offload engine, * but to wrong device. Drop the packet, so it won't skip * XFRM stack. */ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } /* This skb was already validated on the upper/virtual dev */ if ((x->xso.dev != dev) && (x->xso.real_dev == dev)) return skb; local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); err = !skb_queue_empty(&sd->xfrm_backlog); local_irq_restore(flags); if (err) { *again = true; return skb; } if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP); segs = skb_gso_segment(skb, esp_features); if (IS_ERR(segs)) { kfree_skb(skb); dev_core_stats_tx_dropped_inc(dev); return NULL; } else { consume_skb(skb); skb = segs; } } if (!skb->next) { esp_features |= skb->dev->gso_partial_features; xfrm_outer_mode_prep(x, skb); xo->flags |= XFRM_DEV_RESUME; err = x->type_offload->xmit(x, skb, esp_features); if (err) { if (err == -EINPROGRESS) return NULL; XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); kfree_skb(skb); return NULL; } skb_push(skb, skb->data - skb_mac_header(skb)); return skb; } skb_list_walk_safe(skb, skb2, nskb) { esp_features |= skb->dev->gso_partial_features; skb_mark_not_on_list(skb2); xo = xfrm_offload(skb2); xo->flags |= XFRM_DEV_RESUME; xfrm_outer_mode_prep(x, skb2); err = x->type_offload->xmit(x, skb2, esp_features); if (!err) { skb2->next = nskb; } else if (err != -EINPROGRESS) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); skb2->next = nskb; kfree_skb_list(skb2); return NULL; } else { if (skb == skb2) skb = nskb; else pskb->next = nskb; continue; } skb_push(skb2, skb2->data - skb_mac_header(skb2)); pskb = skb2; } return skb; } EXPORT_SYMBOL_GPL(validate_xmit_xfrm); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack) { int err; struct dst_entry *dst; struct net_device *dev; struct xfrm_dev_offload *xso = &x->xso; xfrm_address_t *saddr; xfrm_address_t *daddr; bool is_packet_offload; if (!x->type_offload) { NL_SET_ERR_MSG(extack, "Type doesn't support offload"); return -EINVAL; } if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) { NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) || (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction"); return -EINVAL; } is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; /* We don't yet support TFC padding. */ if (x->tfcpad) { NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { struct xfrm_dst_lookup_params params; if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { saddr = &x->props.saddr; daddr = &x->id.daddr; } else { saddr = &x->id.daddr; daddr = &x->props.saddr; } memset(&params, 0, sizeof(params)); params.net = net; params.saddr = saddr; params.daddr = daddr; params.mark = xfrm_smark_get(0, x); dst = __xfrm_dst_lookup(x->props.family, &params); if (IS_ERR(dst)) return (is_packet_offload) ? -EINVAL : 0; dev = dst->dev; dev_hold(dev); dst_release(dst); } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { xso->dev = NULL; dev_put(dev); return (is_packet_offload) ? -EINVAL : 0; } if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; dev_put(dev); return -EINVAL; } xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); xso->real_dev = dev; if (xuo->flags & XFRM_OFFLOAD_INBOUND) xso->dir = XFRM_DEV_OFFLOAD_IN; else xso->dir = XFRM_DEV_OFFLOAD_OUT; if (is_packet_offload) xso->type = XFRM_DEV_OFFLOAD_PACKET; else xso->type = XFRM_DEV_OFFLOAD_CRYPTO; err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack); if (err) { xso->dev = NULL; xso->dir = 0; xso->real_dev = NULL; netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; /* User explicitly requested packet offload mode and configured * policy in addition to the XFRM state. So be civil to users, * and return an error instead of taking fallback path. */ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) { NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state"); return err; } } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_state_add); int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack) { struct xfrm_dev_offload *xdo = &xp->xdo; struct net_device *dev; int err; if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) { /* We support only packet offload mode and it means * that user must set XFRM_OFFLOAD_PACKET bit. */ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) return -EINVAL; if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) { xdo->dev = NULL; dev_put(dev); NL_SET_ERR_MSG(extack, "Policy offload is not supported"); return -EINVAL; } xdo->dev = dev; netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC); xdo->real_dev = dev; xdo->type = XFRM_DEV_OFFLOAD_PACKET; switch (dir) { case XFRM_POLICY_IN: xdo->dir = XFRM_DEV_OFFLOAD_IN; break; case XFRM_POLICY_OUT: xdo->dir = XFRM_DEV_OFFLOAD_OUT; break; case XFRM_POLICY_FWD: xdo->dir = XFRM_DEV_OFFLOAD_FWD; break; default: xdo->dev = NULL; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG(extack, "Unrecognized offload direction"); return -EINVAL; } err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); if (err) { xdo->dev = NULL; xdo->real_dev = NULL; xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xdo->dir = 0; netdev_put(dev, &xdo->dev_tracker); NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy"); return err; } return 0; } EXPORT_SYMBOL_GPL(xfrm_dev_policy_add); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { int mtu; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; if (!x->type_offload || (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) return false; if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET || ((!dev || (dev == xfrm_dst_path(dst)->dev)) && !xdst->child->xfrm)) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) goto ok; } return false; ok: if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok) return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); void xfrm_dev_resume(struct sk_buff *skb) { struct net_device *dev = skb->dev; int ret = NETDEV_TX_BUSY; struct netdev_queue *txq; struct softnet_data *sd; unsigned long flags; rcu_read_lock(); txq = netdev_core_pick_tx(dev, skb, NULL); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); if (!dev_xmit_complete(ret)) { local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); skb_queue_tail(&sd->xfrm_backlog, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xfrm_dev_resume); void xfrm_dev_backlog(struct softnet_data *sd) { struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog; struct sk_buff_head list; struct sk_buff *skb; if (skb_queue_empty(xfrm_backlog)) return; __skb_queue_head_init(&list); spin_lock(&xfrm_backlog->lock); skb_queue_splice_init(xfrm_backlog, &list); spin_unlock(&xfrm_backlog->lock); while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); xfrm_dev_resume(skb); } } #endif static int xfrm_api_check(struct net_device *dev) { #ifdef CONFIG_XFRM_OFFLOAD if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && !(dev->features & NETIF_F_HW_ESP)) return NOTIFY_BAD; if ((dev->features & NETIF_F_HW_ESP) && (!(dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_add && dev->xfrmdev_ops->xdo_dev_state_delete))) return NOTIFY_BAD; #else if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM)) return NOTIFY_BAD; #endif return NOTIFY_DONE; } static int xfrm_dev_down(struct net_device *dev) { if (dev->features & NETIF_F_HW_ESP) { xfrm_dev_state_flush(dev_net(dev), dev, true); xfrm_dev_policy_flush(dev_net(dev), dev, true); } return NOTIFY_DONE; } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REGISTER: return xfrm_api_check(dev); case NETDEV_FEAT_CHANGE: return xfrm_api_check(dev); case NETDEV_DOWN: case NETDEV_UNREGISTER: return xfrm_dev_down(dev); } return NOTIFY_DONE; } static struct notifier_block xfrm_dev_notifier = { .notifier_call = xfrm_dev_event, }; void __init xfrm_dev_init(void) { register_netdevice_notifier(&xfrm_dev_notifier); }
52 32 5 57 17 98 75 150 95 80 253 264 261 261 106 78 58 36 179 174 98 83 102 102 15 6 5 50 36 50 50 50 4 26 26 30 86 86 3 13 1 3 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 /* SPDX-License-Identifier: GPL-1.0+ */ /* * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. * * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes * NCM: Network and Communications Management, Inc. * * BUT, I'm the one who modified it for ethernet, so: * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov * */ #ifndef _NET_BONDING_H #define _NET_BONDING_H #include <linux/timer.h> #include <linux/proc_fs.h> #include <linux/if_bonding.h> #include <linux/cpumask.h> #include <linux/in6.h> #include <linux/netpoll.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/reciprocal_div.h> #include <linux/if_link.h> #include <net/bond_3ad.h> #include <net/bond_alb.h> #include <net/bond_options.h> #include <net/ipv6.h> #include <net/addrconf.h> #define BOND_MAX_ARP_TARGETS 16 #define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS #define BOND_DEFAULT_MIIMON 100 #ifndef __long_aligned #define __long_aligned __attribute__((aligned((sizeof(long))))) #endif #define slave_info(bond_dev, slave_dev, fmt, ...) \ netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define slave_warn(bond_dev, slave_dev, fmt, ...) \ netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define slave_dbg(bond_dev, slave_dev, fmt, ...) \ netdev_dbg(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define slave_err(bond_dev, slave_dev, fmt, ...) \ netdev_err(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define BOND_MODE(bond) ((bond)->params.mode) /* slave list primitives */ #define bond_slave_list(bond) (&(bond)->dev->adj_list.lower) #define bond_has_slaves(bond) !list_empty(bond_slave_list(bond)) /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */ #define bond_first_slave(bond) \ (bond_has_slaves(bond) ? \ netdev_adjacent_get_private(bond_slave_list(bond)->next) : \ NULL) #define bond_last_slave(bond) \ (bond_has_slaves(bond) ? \ netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \ NULL) /* Caller must have rcu_read_lock */ #define bond_first_slave_rcu(bond) \ netdev_lower_get_first_private_rcu(bond->dev) #define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond)) /** * bond_for_each_slave - iterate over all slaves * @bond: the bond holding this list * @pos: current slave * @iter: list_head * iterator * * Caller must hold RTNL */ #define bond_for_each_slave(bond, pos, iter) \ netdev_for_each_lower_private((bond)->dev, pos, iter) /* Caller must have rcu_read_lock */ #define bond_for_each_slave_rcu(bond, pos, iter) \ netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; static inline void block_netpoll_tx(void) { atomic_inc(&netpoll_block_tx); } static inline void unblock_netpoll_tx(void) { atomic_dec(&netpoll_block_tx); } static inline int is_netpoll_tx_blocked(struct net_device *dev) { if (unlikely(netpoll_tx_running(dev))) return atomic_read(&netpoll_block_tx); return 0; } #else #define block_netpoll_tx() #define unblock_netpoll_tx() #define is_netpoll_tx_blocked(dev) (0) #endif struct bond_params { int mode; int xmit_policy; int miimon; u8 num_peer_notif; u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; int use_carrier; int fail_over_mac; int updelay; int downdelay; int peer_notif_delay; int lacp_active; int lacp_fast; unsigned int min_links; int ad_select; char primary[IFNAMSIZ]; int primary_reselect; __be32 arp_targets[BOND_MAX_ARP_TARGETS]; int tx_queues; int all_slaves_active; int resend_igmp; int lp_interval; int packets_per_slave; int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; #endif int coupled_control; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; }; struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ int delay; /* all 4 in jiffies */ unsigned long last_link_up; unsigned long last_tx; unsigned long last_rx; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ rx_disabled:1, /* indicates whether slave's Rx is disabled */ should_notify:1, /* indicates whether the state changed */ should_notify_link:1; /* indicates whether the link changed */ u8 duplex; u32 original_mtu; u32 link_failure_count; u32 speed; u16 queue_id; u8 perm_hwaddr[MAX_ADDR_LEN]; int prio; struct ad_slave_info *ad_info; struct tlb_slave_info tlb_info; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; static inline struct slave *to_slave(struct kobject *kobj) { return container_of(kobj, struct slave, kobj); } struct bond_up_slave { unsigned int count; struct rcu_head rcu; struct slave *arr[]; }; /* * Link pseudo-state only used internally by monitors */ #define BOND_LINK_NOCHANGE -1 struct bond_ipsec { struct list_head list; struct xfrm_state *xs; }; /* * Here are the locking policies for the two bonding locks: * Get rcu_read_lock when reading or RTNL when writing slave list. */ struct bonding { struct net_device *dev; /* first - useful for panic debug */ struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; struct bond_up_slave __rcu *usable_slaves; struct bond_up_slave __rcu *all_slaves; bool force_primary; bool notifier_ctx; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, struct slave *); /* mode_lock is used for mode-specific locking needs, currently used by: * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and * bond_3ad_state_machine_handler() concurrently and also * the access to the state machine shared variables. * TLB mode (5) - to sync the use and modifications of its hash table * ALB mode (6) - to sync the use and modifications of its hash table */ spinlock_t mode_lock; spinlock_t stats_lock; u32 send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; char proc_file_name[IFNAMSIZ]; #endif /* CONFIG_PROC_FS */ struct list_head bond_list; u32 __percpu *rr_tx_counter; struct ad_bond_info ad_info; struct alb_bond_info alb_info; struct bond_params params; struct workqueue_struct *wq; struct delayed_work mii_work; struct delayed_work arp_work; struct delayed_work alb_work; struct delayed_work ad_work; struct delayed_work mcast_work; struct delayed_work slave_arr_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; #ifdef CONFIG_XFRM_OFFLOAD struct list_head ipsec_list; /* protecting ipsec_list */ struct mutex ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ ((struct slave *) rcu_dereference(dev->rx_handler_data)) #define bond_slave_get_rtnl(dev) \ ((struct slave *) rtnl_dereference(dev->rx_handler_data)) void bond_queue_slave_event(struct slave *slave); void bond_lower_state_changed(struct slave *slave); struct bond_vlan_tag { __be16 vlan_proto; unsigned short vlan_id; }; /* * Returns NULL if the net_device does not belong to any of the bond's slaves * * Caller must hold bond lock for read */ static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) { return netdev_lower_dev_get_private(bond->dev, slave_dev); } static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) { return slave->bond; } static inline bool bond_should_override_tx_queue(struct bonding *bond) { return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_ROUNDROBIN; } static inline bool bond_is_lb(const struct bonding *bond) { return BOND_MODE(bond) == BOND_MODE_TLB || BOND_MODE(bond) == BOND_MODE_ALB; } static inline bool bond_needs_speed_duplex(const struct bonding *bond) { return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond); } static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { return (bond_is_lb(bond) && bond->params.tlb_dynamic_lb == 0); } static inline bool bond_mode_can_use_xmit_hash(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_8023AD || BOND_MODE(bond) == BOND_MODE_XOR || BOND_MODE(bond) == BOND_MODE_TLB || BOND_MODE(bond) == BOND_MODE_ALB); } static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_8023AD || BOND_MODE(bond) == BOND_MODE_XOR || bond_is_nondyn_tlb(bond)); } static inline bool bond_mode_uses_arp(int mode) { return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB && mode != BOND_MODE_ALB; } static inline bool bond_mode_uses_primary(int mode) { return mode == BOND_MODE_ACTIVEBACKUP || mode == BOND_MODE_TLB || mode == BOND_MODE_ALB; } static inline bool bond_uses_primary(struct bonding *bond) { return bond_mode_uses_primary(BOND_MODE(bond)); } static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } static inline bool bond_slave_is_up(struct slave *slave) { return netif_running(slave->dev) && netif_carrier_ok(slave->dev); } static inline void bond_set_active_slave(struct slave *slave) { if (slave->backup) { slave->backup = 0; bond_queue_slave_event(slave); bond_lower_state_changed(slave); } } static inline void bond_set_backup_slave(struct slave *slave) { if (!slave->backup) { slave->backup = 1; bond_queue_slave_event(slave); bond_lower_state_changed(slave); } } static inline void bond_set_slave_state(struct slave *slave, int slave_state, bool notify) { if (slave->backup == slave_state) return; slave->backup = slave_state; if (notify) { bond_lower_state_changed(slave); bond_queue_slave_event(slave); slave->should_notify = 0; } else { if (slave->should_notify) slave->should_notify = 0; else slave->should_notify = 1; } } static inline void bond_slave_state_change(struct bonding *bond) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) { if (tmp->link == BOND_LINK_UP) bond_set_active_slave(tmp); else if (tmp->link == BOND_LINK_DOWN) bond_set_backup_slave(tmp); } } static inline void bond_slave_state_notify(struct bonding *bond) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { bond_lower_state_changed(tmp); tmp->should_notify = 0; } } } static inline int bond_slave_state(struct slave *slave) { return slave->backup; } static inline bool bond_is_active_slave(struct slave *slave) { return !bond_slave_state(slave); } static inline bool bond_slave_can_tx(struct slave *slave) { return bond_slave_is_up(slave) && slave->link == BOND_LINK_UP && bond_is_active_slave(slave); } static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev) { struct slave *slave; bool active; rcu_read_lock(); slave = bond_slave_get_rcu(slave_dev); active = bond_is_active_slave(slave); rcu_read_unlock(); return active; } static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len) { if (len == ETH_ALEN) { ether_addr_copy(dst, src); return; } memcpy(dst, src, len); } #define BOND_PRI_RESELECT_ALWAYS 0 #define BOND_PRI_RESELECT_BETTER 1 #define BOND_PRI_RESELECT_FAILURE 2 #define BOND_FOM_NONE 0 #define BOND_FOM_ACTIVE 1 #define BOND_FOM_FOLLOW 2 #define BOND_ARP_TARGETS_ANY 0 #define BOND_ARP_TARGETS_ALL 1 #define BOND_ARP_VALIDATE_NONE 0 #define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE) #define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP) #define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \ BOND_ARP_VALIDATE_BACKUP) #define BOND_ARP_FILTER (BOND_ARP_VALIDATE_ALL + 1) #define BOND_ARP_FILTER_ACTIVE (BOND_ARP_VALIDATE_ACTIVE | \ BOND_ARP_FILTER) #define BOND_ARP_FILTER_BACKUP (BOND_ARP_VALIDATE_BACKUP | \ BOND_ARP_FILTER) #define BOND_SLAVE_NOTIFY_NOW true #define BOND_SLAVE_NOTIFY_LATER false static inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave) { return bond->params.arp_validate & (1 << bond_slave_state(slave)); } static inline int slave_do_arp_validate_only(struct bonding *bond) { return bond->params.arp_validate & BOND_ARP_FILTER; } static inline int bond_is_ip_target_ok(__be32 addr) { return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } #if IS_ENABLED(CONFIG_IPV6) static inline int bond_is_ip6_target_ok(struct in6_addr *addr) { return !ipv6_addr_any(addr) && !ipv6_addr_loopback(addr) && !ipv6_addr_is_multicast(addr); } #endif /* Get the oldest arp which we've received on this slave for bond's * arp_targets. */ static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond, struct slave *slave) { int i = 1; unsigned long ret = slave->target_last_arp_rx[0]; for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) if (time_before(slave->target_last_arp_rx[i], ret)) ret = slave->target_last_arp_rx[i]; return ret; } static inline unsigned long slave_last_rx(struct bonding *bond, struct slave *slave) { if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL) return slave_oldest_target_arp_rx(bond, slave); return slave->last_rx; } static inline void slave_update_last_tx(struct slave *slave) { WRITE_ONCE(slave->last_tx, jiffies); } static inline unsigned long slave_last_tx(struct slave *slave) { return READ_ONCE(slave->last_tx); } #ifdef CONFIG_NET_POLL_CONTROLLER static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave, struct sk_buff *skb) { return netpoll_send_skb(slave->np, skb); } #else static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave, struct sk_buff *skb) { BUG(); return NETDEV_TX_OK; } #endif static inline void bond_set_slave_inactive_flags(struct slave *slave, bool notify) { if (!bond_is_lb(slave->bond)) bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); if (!slave->bond->params.all_slaves_active) slave->inactive = 1; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) slave->rx_disabled = 1; } static inline void bond_set_slave_tx_disabled_flags(struct slave *slave, bool notify) { bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); } static inline void bond_set_slave_active_flags(struct slave *slave, bool notify) { bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); slave->inactive = 0; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) slave->rx_disabled = 0; } static inline void bond_set_slave_rx_enabled_flags(struct slave *slave, bool notify) { slave->rx_disabled = 0; } static inline bool bond_is_slave_inactive(struct slave *slave) { return slave->inactive; } static inline bool bond_is_slave_rx_disabled(struct slave *slave) { return slave->rx_disabled; } static inline void bond_propose_link_state(struct slave *slave, int state) { slave->link_new_state = state; } static inline void bond_commit_link_state(struct slave *slave, bool notify) { if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; if (notify) { bond_queue_slave_event(slave); bond_lower_state_changed(slave); slave->should_notify_link = 0; } else { if (slave->should_notify_link) slave->should_notify_link = 0; else slave->should_notify_link = 1; } } static inline void bond_set_slave_link_state(struct slave *slave, int state, bool notify) { bond_propose_link_state(slave, state); bond_commit_link_state(slave, notify); } static inline void bond_slave_link_notify(struct bonding *bond) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify_link) { bond_queue_slave_event(tmp); bond_lower_state_changed(tmp); tmp->should_notify_link = 0; } } } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) { struct in_device *in_dev; __be32 addr = 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) addr = inet_confirm_addr(dev_net(dev), in_dev, dst, local, RT_SCOPE_HOST); rcu_read_unlock(); return addr; } struct bond_net { struct net *net; /* Associated network namespace */ struct list_head dev_list; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_dir; #endif struct class_attribute class_attr_bonding_masters; }; int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); void bond_xdp_set_features(struct net_device *bond_dev); int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); int bond_set_carrier(struct bonding *bond); void bond_select_active_slave(struct bonding *bond); void bond_change_active_slave(struct bonding *bond, struct slave *new_active); void bond_create_debugfs(void); void bond_destroy_debugfs(void); void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void); int bond_netlink_init(void); void bond_netlink_fini(void); struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond); const char *bond_slave_link_status(s8 link); struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, struct net_device *end_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); #ifdef CONFIG_PROC_FS void bond_create_proc_entry(struct bonding *bond); void bond_remove_proc_entry(struct bonding *bond); void bond_create_proc_dir(struct bond_net *bn); void bond_destroy_proc_dir(struct bond_net *bn); #else static inline void bond_create_proc_entry(struct bonding *bond) { } static inline void bond_remove_proc_entry(struct bonding *bond) { } static inline void bond_create_proc_dir(struct bond_net *bn) { } static inline void bond_destroy_proc_dir(struct bond_net *bn) { } #endif static inline struct slave *bond_slave_has_mac(struct bonding *bond, const u8 *mac) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return tmp; return NULL; } /* Caller must hold rcu_read_lock() for read */ static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac) { struct list_head *iter; struct slave *tmp; bond_for_each_slave_rcu(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return true; return false; } /* Check if the ip is present in arp ip list, or first free slot if ip == 0 * Returns -1 if not found, index if found */ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) { int i; for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) if (targets[i] == ip) return i; else if (targets[i] == 0) break; return -1; } #if IS_ENABLED(CONFIG_IPV6) static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) { struct in6_addr mcaddr; int i; for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { addrconf_addr_solict_mult(&targets[i], &mcaddr); if ((ipv6_addr_equal(&targets[i], ip)) || (ipv6_addr_equal(&mcaddr, ip))) return i; else if (ipv6_addr_any(&targets[i])) break; } return -1; } #endif /* exported from bond_main.c */ extern unsigned int bond_net_id; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; /* exported from bond_sysfs_slave.c */ extern const struct sysfs_ops slave_sysfs_ops; /* exported from bond_3ad.c */ extern const u8 lacpdu_mcast_addr[]; static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NET_XMIT_DROP; } #endif /* _NET_BONDING_H */
13 435 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 /* SPDX-License-Identifier: GPL-2.0 */ /* * Dynamic queue limits (dql) - Definitions * * Copyright (c) 2011, Tom Herbert <therbert@google.com> * * This header file contains the definitions for dynamic queue limits (dql). * dql would be used in conjunction with a producer/consumer type queue * (possibly a HW queue). Such a queue would have these general properties: * * 1) Objects are queued up to some limit specified as number of objects. * 2) Periodically a completion process executes which retires consumed * objects. * 3) Starvation occurs when limit has been reached, all queued data has * actually been consumed, but completion processing has not yet run * so queuing new data is blocked. * 4) Minimizing the amount of queued data is desirable. * * The goal of dql is to calculate the limit as the minimum number of objects * needed to prevent starvation. * * The primary functions of dql are: * dql_queued - called when objects are enqueued to record number of objects * dql_avail - returns how many objects are available to be queued based * on the object limit and how many objects are already enqueued * dql_completed - called at completion time to indicate how many objects * were retired from the queue * * The dql implementation does not implement any locking for the dql data * structures, the higher layer should provide this. dql_queued should * be serialized to prevent concurrent execution of the function; this * is also true for dql_completed. However, dql_queued and dlq_completed can * be executed concurrently (i.e. they can be protected by different locks). */ #ifndef _LINUX_DQL_H #define _LINUX_DQL_H #ifdef __KERNEL__ #include <linux/bitops.h> #include <asm/bug.h> #define DQL_HIST_LEN 4 #define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ /* Stall threshold (in jiffies), defined by user */ unsigned short stall_thrs; unsigned long history_head; /* top 58 bits of jiffies */ /* stall entries, a bit per entry */ unsigned long history[DQL_HIST_LEN]; /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ unsigned int num_completed; /* Total ever completed */ unsigned int prev_ovlimit; /* Previous over limit */ unsigned int prev_num_queued; /* Previous queue total */ unsigned int prev_last_obj_cnt; /* Previous queuing cnt */ unsigned int lowest_slack; /* Lowest slack found */ unsigned long slack_start_time; /* Time slacks seen */ /* Configuration */ unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ /* Longest stall detected, reported to user */ unsigned short stall_max; unsigned long last_reap; /* Last reap (in jiffies) */ unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ #define DQL_MAX_OBJECT (UINT_MAX / 16) #define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT) /* Populate the bitmap to be processed later in dql_check_stall() */ static inline void dql_queue_stall(struct dql *dql) { unsigned long map, now, now_hi, i; now = jiffies; now_hi = now / BITS_PER_LONG; /* The following code set a bit in the ring buffer, where each * bit trackes time the packet was queued. The dql->history buffer * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot */ if (unlikely(now_hi != dql->history_head)) { /* About to reuse slots, clear them */ for (i = 0; i < DQL_HIST_LEN; i++) { /* Multiplication masks high bits */ if (now_hi * BITS_PER_LONG == (dql->history_head + i) * BITS_PER_LONG) break; DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; } /* pairs with smp_rmb() in dql_check_stall() */ smp_wmb(); WRITE_ONCE(dql->history_head, now_hi); } /* __set_bit() does not guarantee WRITE_ONCE() semantics */ map = DQL_HIST_ENT(dql, now_hi); /* Populate the history with an entry (bit) per queued */ if (!(map & BIT_MASK(now))) WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } /* * Record number of objects queued. Assumes that caller has already checked * availability in the queue with dql_avail. */ static inline void dql_queued(struct dql *dql, unsigned int count) { if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) return; WRITE_ONCE(dql->last_obj_cnt, count); /* We want to force a write first, so that cpu do not attempt * to get cache line containing last_obj_cnt, num_queued, adj_limit * in Shared state, but directly does a Request For Ownership * It is only a hint, we use barrier() only. */ barrier(); dql->num_queued += count; /* Only populate stall information if the threshold is set */ if (READ_ONCE(dql->stall_thrs)) dql_queue_stall(dql); } /* Returns how many objects can be queued, < 0 indicates over limit. */ static inline int dql_avail(const struct dql *dql) { return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued); } /* Record number of completed objects and recalculate the limit. */ void dql_completed(struct dql *dql, unsigned int count); /* Reset dql state */ void dql_reset(struct dql *dql); /* Initialize dql state */ void dql_init(struct dql *dql, unsigned int hold_time); #endif /* _KERNEL_ */ #endif /* _LINUX_DQL_H */
4 4 3 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 // SPDX-License-Identifier: GPL-2.0-or-later /* RomFS storage access routines * * Copyright © 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/fs.h> #include <linux/mtd/super.h> #include <linux/buffer_head.h> #include "internal.h" #if !defined(CONFIG_ROMFS_ON_MTD) && !defined(CONFIG_ROMFS_ON_BLOCK) #error no ROMFS backing store interface configured #endif #ifdef CONFIG_ROMFS_ON_MTD #define ROMFS_MTD_READ(sb, ...) mtd_read((sb)->s_mtd, ##__VA_ARGS__) /* * read data from an romfs image on an MTD device */ static int romfs_mtd_read(struct super_block *sb, unsigned long pos, void *buf, size_t buflen) { size_t rlen; int ret; ret = ROMFS_MTD_READ(sb, pos, buflen, &rlen, buf); return (ret < 0 || rlen != buflen) ? -EIO : 0; } /* * determine the length of a string in a romfs image on an MTD device */ static ssize_t romfs_mtd_strnlen(struct super_block *sb, unsigned long pos, size_t maxlen) { ssize_t n = 0; size_t segment; u_char buf[16], *p; size_t len; int ret; /* scan the string up to 16 bytes at a time */ while (maxlen > 0) { segment = min_t(size_t, maxlen, 16); ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); if (ret < 0) return ret; p = memchr(buf, 0, len); if (p) return n + (p - buf); maxlen -= len; pos += len; n += len; } return n; } /* * compare a string to one in a romfs image on MTD * - return 1 if matched, 0 if differ, -ve if error */ static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos, const char *str, size_t size) { u_char buf[17]; size_t len, segment; int ret; /* scan the string up to 16 bytes at a time, and attempt to grab the * trailing NUL whilst we're at it */ buf[0] = 0xff; while (size > 0) { segment = min_t(size_t, size + 1, 17); ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); if (ret < 0) return ret; len--; if (memcmp(buf, str, len) != 0) return 0; buf[0] = buf[len]; size -= len; pos += len; str += len; } /* check the trailing NUL was */ if (buf[0]) return 0; return 1; } #endif /* CONFIG_ROMFS_ON_MTD */ #ifdef CONFIG_ROMFS_ON_BLOCK /* * read data from an romfs image on a block device */ static int romfs_blk_read(struct super_block *sb, unsigned long pos, void *buf, size_t buflen) { struct buffer_head *bh; unsigned long offset; size_t segment; /* copy the string up to blocksize bytes at a time */ while (buflen > 0) { offset = pos & (ROMBSIZE - 1); segment = min_t(size_t, buflen, ROMBSIZE - offset); bh = sb_bread(sb, pos >> ROMBSBITS); if (!bh) return -EIO; memcpy(buf, bh->b_data + offset, segment); brelse(bh); buf += segment; buflen -= segment; pos += segment; } return 0; } /* * determine the length of a string in romfs on a block device */ static ssize_t romfs_blk_strnlen(struct super_block *sb, unsigned long pos, size_t limit) { struct buffer_head *bh; unsigned long offset; ssize_t n = 0; size_t segment; u_char *buf, *p; /* scan the string up to blocksize bytes at a time */ while (limit > 0) { offset = pos & (ROMBSIZE - 1); segment = min_t(size_t, limit, ROMBSIZE - offset); bh = sb_bread(sb, pos >> ROMBSBITS); if (!bh) return -EIO; buf = bh->b_data + offset; p = memchr(buf, 0, segment); brelse(bh); if (p) return n + (p - buf); limit -= segment; pos += segment; n += segment; } return n; } /* * compare a string to one in a romfs image on a block device * - return 1 if matched, 0 if differ, -ve if error */ static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos, const char *str, size_t size) { struct buffer_head *bh; unsigned long offset; size_t segment; bool matched, terminated = false; /* compare string up to a block at a time */ while (size > 0) { offset = pos & (ROMBSIZE - 1); segment = min_t(size_t, size, ROMBSIZE - offset); bh = sb_bread(sb, pos >> ROMBSBITS); if (!bh) return -EIO; matched = (memcmp(bh->b_data + offset, str, segment) == 0); size -= segment; pos += segment; str += segment; if (matched && size == 0 && offset + segment < ROMBSIZE) { if (!bh->b_data[offset + segment]) terminated = true; else matched = false; } brelse(bh); if (!matched) return 0; } if (!terminated) { /* the terminating NUL must be on the first byte of the next * block */ BUG_ON((pos & (ROMBSIZE - 1)) != 0); bh = sb_bread(sb, pos >> ROMBSBITS); if (!bh) return -EIO; matched = !bh->b_data[0]; brelse(bh); if (!matched) return 0; } return 1; } #endif /* CONFIG_ROMFS_ON_BLOCK */ /* * read data from the romfs image */ int romfs_dev_read(struct super_block *sb, unsigned long pos, void *buf, size_t buflen) { size_t limit; limit = romfs_maxsize(sb); if (pos >= limit || buflen > limit - pos) return -EIO; #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) return romfs_mtd_read(sb, pos, buf, buflen); #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) return romfs_blk_read(sb, pos, buf, buflen); #endif return -EIO; } /* * determine the length of a string in romfs */ ssize_t romfs_dev_strnlen(struct super_block *sb, unsigned long pos, size_t maxlen) { size_t limit; limit = romfs_maxsize(sb); if (pos >= limit) return -EIO; if (maxlen > limit - pos) maxlen = limit - pos; #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) return romfs_mtd_strnlen(sb, pos, maxlen); #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) return romfs_blk_strnlen(sb, pos, maxlen); #endif return -EIO; } /* * compare a string to one in romfs * - the string to be compared to, str, may not be NUL-terminated; instead the * string is of the specified size * - return 1 if matched, 0 if differ, -ve if error */ int romfs_dev_strcmp(struct super_block *sb, unsigned long pos, const char *str, size_t size) { size_t limit; limit = romfs_maxsize(sb); if (pos >= limit) return -EIO; if (size > ROMFS_MAXFN) return -ENAMETOOLONG; if (size + 1 > limit - pos) return -EIO; #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) return romfs_mtd_strcmp(sb, pos, str, size); #endif #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) return romfs_blk_strcmp(sb, pos, str, size); #endif return -EIO; }
6 6 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> #include <linux/icmp.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/udp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("xt_tcp"); MODULE_ALIAS("xt_udp"); MODULE_ALIAS("ipt_udp"); MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); MODULE_ALIAS("ipt_icmp"); MODULE_ALIAS("ip6t_icmp6"); /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) { return (port >= min && port <= max) ^ invert; } static bool tcp_find_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, unsigned int optlen, bool invert, bool *hotdrop) { /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ const u_int8_t *op; u_int8_t _opt[60 - sizeof(struct tcphdr)]; unsigned int i; pr_debug("finding option\n"); if (!optlen) return invert; /* If we don't have the whole header, drop packet. */ op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr), optlen, _opt); if (op == NULL) { *hotdrop = true; return false; } for (i = 0; i < optlen; ) { if (op[i] == option) return !invert; if (op[i] < 2) i++; else i += op[i+1]?:1; } return invert; } static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct tcphdr *th; struct tcphdr _tcph; const struct xt_tcp *tcpinfo = par->matchinfo; if (par->fragoff != 0) { /* To quote Alan: Don't allow a fragment of TCP 8 bytes in. Nobody normal causes this. Its a cracker trying to break in by doing a flag overwrite to pass the direction checks. */ if (par->fragoff == 1) { pr_debug("Dropping evil TCP offset=1 frag.\n"); par->hotdrop = true; } /* Must not be a fragment. */ return false; } th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil TCP offset=0 tinygram.\n"); par->hotdrop = true; return false; } if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(th->source), !!(tcpinfo->invflags & XT_TCP_INV_SRCPT))) return false; if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS, (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp)) return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { par->hotdrop = true; return false; } if (!tcp_find_option(tcpinfo->option, skb, par->thoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, &par->hotdrop)) return false; } return true; } static int tcp_mt_check(const struct xt_mtchk_param *par) { const struct xt_tcp *tcpinfo = par->matchinfo; /* Must specify no unknown invflags */ return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0; } static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct udphdr *uh; struct udphdr _udph; const struct xt_udp *udpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph); if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ pr_debug("Dropping evil UDP tinygram.\n"); par->hotdrop = true; return false; } return port_match(udpinfo->spts[0], udpinfo->spts[1], ntohs(uh->source), !!(udpinfo->invflags & XT_UDP_INV_SRCPT)) && port_match(udpinfo->dpts[0], udpinfo->dpts[1], ntohs(uh->dest), !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); } static int udp_mt_check(const struct xt_mtchk_param *par) { const struct xt_udp *udpinfo = par->matchinfo; /* Must specify no unknown invflags */ return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0; } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ static bool type_code_in_range(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code) { return type == test_type && code >= min_code && code <= max_code; } static bool icmp_type_code_match(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code, bool invert) { return (test_type == 0xFF || type_code_in_range(test_type, min_code, max_code, type, code)) ^ invert; } static bool icmp6_type_code_match(u8 test_type, u8 min_code, u8 max_code, u8 type, u8 code, bool invert) { return type_code_in_range(test_type, min_code, max_code, type, code) ^ invert; } static bool icmp_match(const struct sk_buff *skb, struct xt_action_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (!ic) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ par->hotdrop = true; return false; } return icmp_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], ic->type, ic->code, !!(icmpinfo->invflags & IPT_ICMP_INV)); } static bool icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) { const struct icmp6hdr *ic; struct icmp6hdr _icmph; const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ if (par->fragoff != 0) return false; ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (!ic) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ par->hotdrop = true; return false; } return icmp6_type_code_match(icmpinfo->type, icmpinfo->code[0], icmpinfo->code[1], ic->icmp6_type, ic->icmp6_code, !!(icmpinfo->invflags & IP6T_ICMP_INV)); } static int icmp_checkentry(const struct xt_mtchk_param *par) { const struct ipt_icmp *icmpinfo = par->matchinfo; return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; } static int icmp6_checkentry(const struct xt_mtchk_param *par) { const struct ip6t_icmp *icmpinfo = par->matchinfo; return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; } static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", .family = NFPROTO_IPV4, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, { .name = "tcp", .family = NFPROTO_IPV6, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, { .name = "udp", .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDP, .me = THIS_MODULE, }, { .name = "udp", .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDP, .me = THIS_MODULE, }, { .name = "udplite", .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, { .name = "udplite", .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, .family = NFPROTO_IPV4, .me = THIS_MODULE, }, { .name = "icmp6", .match = icmp6_match, .matchsize = sizeof(struct ip6t_icmp), .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, .family = NFPROTO_IPV6, .me = THIS_MODULE, }, }; static int __init tcpudp_mt_init(void) { return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg)); } static void __exit tcpudp_mt_exit(void) { xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg)); } module_init(tcpudp_mt_init); module_exit(tcpudp_mt_exit);
3 138 43 60 2 2 108 41 8 36 134 79 8 67 47 40 40 33 2 1 18 1 2 31 3 40 358 153 310 287 2 95 80 383 48 305 283 486 311 19 781 70 62 21 726 723 709 1 533 215 355 1352 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_NF_TABLES_H #define _NET_NF_TABLES_H #include <linux/unaligned.h> #include <linux/list.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> #include <linux/rhashtable.h> #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> #include <net/flow_offload.h> #include <net/netns/generic.h> #define NFT_MAX_HOOKS (NF_INET_INGRESS + 1) struct module; #define NFT_JUMP_STACK_SIZE 16 enum { NFT_PKTINFO_L4PROTO = (1 << 0), NFT_PKTINFO_INNER = (1 << 1), NFT_PKTINFO_INNER_FULL = (1 << 2), }; struct nft_pktinfo { struct sk_buff *skb; const struct nf_hook_state *state; u8 flags; u8 tprot; u16 fragoff; u16 thoff; u16 inneroff; }; static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) { return pkt->state->sk; } static inline unsigned int nft_thoff(const struct nft_pktinfo *pkt) { return pkt->thoff; } static inline struct net *nft_net(const struct nft_pktinfo *pkt) { return pkt->state->net; } static inline unsigned int nft_hook(const struct nft_pktinfo *pkt) { return pkt->state->hook; } static inline u8 nft_pf(const struct nft_pktinfo *pkt) { return pkt->state->pf; } static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt) { return pkt->state->in; } static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt) { return pkt->state->out; } static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { pkt->skb = skb; pkt->state = state; } static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) { pkt->flags = 0; pkt->tprot = 0; pkt->thoff = 0; pkt->fragoff = 0; } /** * struct nft_verdict - nf_tables verdict * * @code: nf_tables/netfilter verdict code * @chain: destination chain for NFT_JUMP/NFT_GOTO */ struct nft_verdict { u32 code; struct nft_chain *chain; }; struct nft_data { union { u32 data[4]; struct nft_verdict verdict; }; } __attribute__((aligned(__alignof__(u64)))); #define NFT_REG32_NUM 20 /** * struct nft_regs - nf_tables register set * * @data: data registers * @verdict: verdict register * * The first four data registers alias to the verdict register. */ struct nft_regs { union { u32 data[NFT_REG32_NUM]; struct nft_verdict verdict; }; }; struct nft_regs_track { struct { const struct nft_expr *selector; const struct nft_expr *bitwise; u8 num_reg; } regs[NFT_REG32_NUM]; const struct nft_expr *cur; const struct nft_expr *last; }; /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit * level. So for store instruction, pad the rest part with zero to avoid * garbage values. */ static inline void nft_reg_store8(u32 *dreg, u8 val) { *dreg = 0; *(u8 *)dreg = val; } static inline u8 nft_reg_load8(const u32 *sreg) { return *(u8 *)sreg; } static inline void nft_reg_store16(u32 *dreg, u16 val) { *dreg = 0; *(u16 *)dreg = val; } static inline void nft_reg_store_be16(u32 *dreg, __be16 val) { nft_reg_store16(dreg, (__force __u16)val); } static inline u16 nft_reg_load16(const u32 *sreg) { return *(u16 *)sreg; } static inline __be16 nft_reg_load_be16(const u32 *sreg) { return (__force __be16)nft_reg_load16(sreg); } static inline __be32 nft_reg_load_be32(const u32 *sreg) { return *(__force __be32 *)sreg; } static inline void nft_reg_store64(u64 *dreg, u64 val) { put_unaligned(val, dreg); } static inline u64 nft_reg_load64(const u32 *sreg) { return get_unaligned((u64 *)sreg); } static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { if (len % NFT_REG32_SIZE) dst[len / NFT_REG32_SIZE] = 0; memcpy(dst, src, len); } /** * struct nft_ctx - nf_tables rule/set context * * @net: net namespace * @table: the table the chain is contained in * @chain: the chain the rule is contained in * @nla: netlink attributes * @portid: netlink portID of the original message * @seq: netlink sequence number * @flags: modifiers to new request * @family: protocol family * @level: depth of the chains * @report: notify via unicast netlink message * @reg_inited: bitmap of initialised registers */ struct nft_ctx { struct net *net; struct nft_table *table; struct nft_chain *chain; const struct nlattr * const *nla; u32 portid; u32 seq; u16 flags; u8 family; u8 level; bool report; DECLARE_BITMAP(reg_inited, NFT_REG32_NUM); }; enum nft_data_desc_flags { NFT_DATA_DESC_SETELEM = (1 << 0), }; struct nft_data_desc { enum nft_data_types type; unsigned int size; unsigned int len; unsigned int flags; }; int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla); void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) { return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; } static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) { return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); int nft_parse_register_load(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *sreg, u32 len); int nft_parse_register_store(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *dreg, const struct nft_data *data, enum nft_data_types type, unsigned int len); /** * struct nft_userdata - user defined data associated with an object * * @len: length of the data * @data: content * * The presence of user data is indicated in an object specific fashion, * so a length of zero can't occur and the value "len" indicates data * of length len + 1. */ struct nft_userdata { u8 len; unsigned char data[]; }; /* placeholder structure for opaque set element backend representation. */ struct nft_elem_priv { }; /** * struct nft_set_elem - generic representation of set elements * * @key: element key * @key_end: closing element key * @data: element data * @priv: element private data and extensions */ struct nft_set_elem { union { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } key; union { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } key_end; union { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; struct nft_elem_priv *priv; }; static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) { return (void *)priv; } /** * enum nft_iter_type - nftables set iterator type * * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state */ enum nft_iter_type { NFT_ITER_UNSPEC, NFT_ITER_READ, NFT_ITER_UPDATE, }; struct nft_set; struct nft_set_iter { u8 genmask; enum nft_iter_type type:8; unsigned int count; unsigned int skip; int err; int (*fn)(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv); }; /** * struct nft_set_desc - description of set elements * * @ktype: key type * @klen: key length * @dtype: data type * @dlen: data length * @objtype: object type * @size: number of set elements * @policy: set policy * @gc_int: garbage collector interval * @timeout: element timeout * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @expr: set must support for expressions */ struct nft_set_desc { u32 ktype; unsigned int klen; u32 dtype; unsigned int dlen; u32 objtype; unsigned int size; u32 policy; u32 gc_int; u64 timeout; u8 field_len[NFT_REG32_COUNT]; u8 field_count; bool expr; }; /** * enum nft_set_class - performance class * * @NFT_SET_CLASS_O_1: constant, O(1) * @NFT_SET_CLASS_O_LOG_N: logarithmic, O(log N) * @NFT_SET_CLASS_O_N: linear, O(N) */ enum nft_set_class { NFT_SET_CLASS_O_1, NFT_SET_CLASS_O_LOG_N, NFT_SET_CLASS_O_N, }; /** * struct nft_set_estimate - estimation of memory and performance * characteristics * * @size: required memory * @lookup: lookup performance class * @space: memory class */ struct nft_set_estimate { u64 size; enum nft_set_class lookup; enum nft_set_class space; }; #define NFT_EXPR_MAXATTR 16 #define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ ALIGN(size, __alignof__(struct nft_expr))) /** * struct nft_expr - nf_tables expression * * @ops: expression ops * @data: expression private data */ struct nft_expr { const struct nft_expr_ops *ops; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_expr_priv(const struct nft_expr *expr) { return (void *)expr->data; } struct nft_expr_info; int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, struct nft_expr_info *info); int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr, bool reset); bool nft_expr_reduce_bitwise(struct nft_regs_track *track, const struct nft_expr *expr); struct nft_set_ext; /** * struct nft_set_ops - nf_tables set operations * * @lookup: look up an element within the set * @update: update an element if exists, add it if doesn't exist * @delete: delete an element * @insert: insert new element into set * @activate: activate new element in the next generation * @deactivate: lookup for element and deactivate it in the next generation * @flush: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elements * @get: get set elements * @commit: commit set elements * @abort: abort set elements * @privsize: function to return size of set private data * @estimate: estimate the required memory size and the lookup complexity class * @init: initialize private data of new set instance * @destroy: destroy private data of set instance * @gc_init: initialize garbage collection * @elemsize: element private size * * Operations lookup, update and delete have simpler interfaces, are faster * and currently only used in the packet path. All the rest are slower, * control plane functions. */ struct nft_set_ops { bool (*lookup)(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, const u32 *key, struct nft_elem_priv * (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *), const struct nft_expr *expr, struct nft_regs *regs, const struct nft_set_ext **ext); bool (*delete)(const struct nft_set *set, const u32 *key); int (*insert)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **priv); void (*activate)(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv); struct nft_elem_priv * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); void (*flush)(const struct net *net, const struct nft_set *set, struct nft_elem_priv *priv); void (*remove)(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); struct nft_elem_priv * (*get)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); void (*commit)(struct nft_set *set); void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); int (*init)(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]); void (*destroy)(const struct nft_ctx *ctx, const struct nft_set *set); void (*gc_init)(const struct nft_set *set); unsigned int elemsize; }; /** * struct nft_set_type - nf_tables set type * * @ops: set ops for this type * @features: features supported by the implementation */ struct nft_set_type { const struct nft_set_ops ops; u32 features; }; #define to_set_type(o) container_of(o, struct nft_set_type, ops) struct nft_set_elem_expr { u8 size; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; #define nft_setelem_expr_at(__elem_expr, __offset) \ ((struct nft_expr *)&__elem_expr->data[__offset]) #define nft_setelem_expr_foreach(__expr, __elem_expr, __size) \ for (__expr = nft_setelem_expr_at(__elem_expr, 0), __size = 0; \ __size < (__elem_expr)->size; \ __size += (__expr)->ops->size, __expr = ((void *)(__expr)) + (__expr)->ops->size) #define NFT_SET_EXPR_MAX 2 /** * struct nft_set - nf_tables set instance * * @list: table set list node * @bindings: list of set bindings * @refs: internal refcounting for async set destruction * @table: table this set belongs to * @net: netnamespace this set belongs to * @name: name of the set * @handle: unique handle of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal * @timeout: default timeout value in jiffies * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @udlen: user data length * @udata: user data * @pending_update: list of pending update set element * @ops: set ops * @flags: set flags * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length * @num_exprs: numbers of exprs * @exprs: stateful expression * @catchall_list: list of catch-all set element * @data: private set data */ struct nft_set { struct list_head list; struct list_head bindings; refcount_t refs; struct nft_table *table; possible_net_t net; char *name; u64 handle; u32 ktype; u32 dtype; u32 objtype; u32 size; u8 field_len[NFT_REG32_COUNT]; u8 field_count; u32 use; atomic_t nelems; u32 ndeact; u64 timeout; u32 gc_int; u16 policy; u16 udlen; unsigned char *udata; struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags:13, dead:1, genmask:2; u8 klen; u8 dlen; u8 num_exprs; struct nft_expr *exprs[NFT_SET_EXPR_MAX]; struct list_head catchall_list; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; static inline bool nft_set_is_anonymous(const struct nft_set *set) { return set->flags & NFT_SET_ANONYMOUS; } static inline void *nft_set_priv(const struct nft_set *set) { return (void *)set->data; } static inline enum nft_data_types nft_set_datatype(const struct nft_set *set) { return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; } static inline bool nft_set_gc_is_pending(const struct nft_set *s) { return refcount_read(&s->refs) != 1; } static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); } struct nft_set *nft_set_lookup_global(const struct net *net, const struct nft_table *table, const struct nlattr *nla_set_name, const struct nlattr *nla_set_id, u8 genmask); struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { u32 gc_int = READ_ONCE(set->gc_int); return gc_int ? msecs_to_jiffies(gc_int) : HZ; } /** * struct nft_set_binding - nf_tables set binding * * @list: set bindings list node * @chain: chain containing the rule bound to the set * @flags: set action flags * * A set binding contains all information necessary for validation * of new elements added to a bound set. */ struct nft_set_binding { struct list_head list; const struct nft_chain *chain; u32 flags; }; enum nft_trans_phase; void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** * enum nft_set_extensions - set extension type IDs * * @NFT_SET_EXT_KEY: element key * @NFT_SET_EXT_KEY_END: upper bound element key, for ranges * @NFT_SET_EXT_DATA: mapping data * @NFT_SET_EXT_FLAGS: element flags * @NFT_SET_EXT_TIMEOUT: element timeout * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_EXPRESSIONS: expressions associated with the element * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { NFT_SET_EXT_KEY, NFT_SET_EXT_KEY_END, NFT_SET_EXT_DATA, NFT_SET_EXT_FLAGS, NFT_SET_EXT_TIMEOUT, NFT_SET_EXT_USERDATA, NFT_SET_EXT_EXPRESSIONS, NFT_SET_EXT_OBJREF, NFT_SET_EXT_NUM }; /** * struct nft_set_ext_type - set extension type * * @len: fixed part length of the extension * @align: alignment requirements of the extension */ struct nft_set_ext_type { u8 len; u8 align; }; extern const struct nft_set_ext_type nft_set_ext_types[]; /** * struct nft_set_ext_tmpl - set extension template * * @len: length of extension area * @offset: offsets of individual extension types * @ext_len: length of the expected extension(used to sanity check) */ struct nft_set_ext_tmpl { u16 len; u8 offset[NFT_SET_EXT_NUM]; u8 ext_len[NFT_SET_EXT_NUM]; }; /** * struct nft_set_ext - set extensions * * @genmask: generation mask * @offset: offsets of individual extension types * @data: beginning of extension data */ struct nft_set_ext { u8 genmask; u8 offset[NFT_SET_EXT_NUM]; char data[]; }; static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) { memset(tmpl, 0, sizeof(*tmpl)); tmpl->len = sizeof(struct nft_set_ext); } static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); if (tmpl->len > U8_MAX) return -EINVAL; tmpl->offset[id] = tmpl->len; tmpl->ext_len[id] = nft_set_ext_types[id].len + len; tmpl->len += tmpl->ext_len[id]; return 0; } static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, const struct nft_set_ext_tmpl *tmpl) { memcpy(ext->offset, tmpl->offset, sizeof(ext->offset)); } static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) { return !!ext->offset[id]; } static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id) { return ext && __nft_set_ext_exists(ext, id); } static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id) { return (void *)ext + ext->offset[id]; } static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_KEY); } static inline struct nft_data *nft_set_ext_key_end(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_KEY_END); } static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_DATA); } static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } struct nft_timeout { u64 timeout; u64 expiration; }; static inline struct nft_timeout *nft_set_ext_timeout(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); } static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_USERDATA); } static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); } static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, u64 tstamp) { if (!nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) || READ_ONCE(nft_set_ext_timeout(ext)->timeout) == 0) return false; return time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration)); } static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) { return __nft_set_elem_expired(ext, get_jiffies_64()); } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, const struct nft_elem_priv *elem_priv) { return (void *)elem_priv + set->ops->elemsize; } static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_OBJREF); } struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr); struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); void nft_set_elem_destroy(const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr); void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_elem_priv *elem_priv); struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type * * @select_ops: function to select nft_expr_ops * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present * @inner_ops: inner ops, used for inner packet operation * @list: used internally * @name: Identifier * @owner: module reference * @policy: netlink attribute policy * @maxattr: highest netlink attribute number * @family: address family for AF-specific types * @flags: expression type flags */ struct nft_expr_type { const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, const struct nlattr * const tb[]); void (*release_ops)(const struct nft_expr_ops *ops); const struct nft_expr_ops *ops; const struct nft_expr_ops *inner_ops; struct list_head list; const char *name; struct module *owner; const struct nla_policy *policy; unsigned int maxattr; u8 family; u8 flags; }; #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2 enum nft_trans_phase { NFT_TRANS_PREPARE, NFT_TRANS_PREPARE_ERROR, NFT_TRANS_ABORT, NFT_TRANS_COMMIT, NFT_TRANS_RELEASE }; struct nft_flow_rule; struct nft_offload_ctx; /** * struct nft_expr_ops - nf_tables expression operations * * @eval: Expression evaluation function * @clone: Expression clone function * @size: full expression size, including private data size * @init: initialization function * @activate: activate expression in the next generation * @deactivate: deactivate expression in next generation * @destroy: destruction function, called after synchronize_rcu * @destroy_clone: destruction clone function * @dump: function to dump parameters * @validate: validate expression, called during loop detection * @reduce: reduce expression * @gc: garbage collection expression * @offload: hardware offload expression * @offload_action: function to report true/false to allocate one slot or not in the flow * offload array * @offload_stats: function to synchronize hardware stats via updating the counter expression * @type: expression type * @data: extra data to attach to this expression operation */ struct nft_expr_ops { void (*eval)(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); int (*clone)(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp); unsigned int size; int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr); bool (*reduce)(struct nft_regs_track *track, const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr); bool (*offload_action)(const struct nft_expr *expr); void (*offload_stats)(struct nft_expr *expr, const struct flow_stats *stats); const struct nft_expr_type *type; void *data; }; /** * struct nft_rule - nf_tables rule * * @list: used internally * @handle: rule handle * @genmask: generation mask * @dlen: length of expression data * @udata: user data is appended to the rule * @data: expression data */ struct nft_rule { struct list_head list; u64 handle:42, genmask:2, dlen:12, udata:1; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; } static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr) { return ((void *)expr) + expr->ops->size; } static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[rule->dlen]; } static inline bool nft_expr_more(const struct nft_rule *rule, const struct nft_expr *expr) { return expr != nft_expr_last(rule) && expr->ops; } static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) { return (void *)&rule->data[rule->dlen]; } void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule); void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, enum nft_trans_phase phase); void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule); static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_set_elem_expr *elem_expr; struct nft_expr *expr; u32 size; if (__nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) { elem_expr = nft_set_ext_expr(ext); nft_setelem_expr_foreach(expr, elem_expr, size) { expr->ops->eval(expr, regs, pkt); if (regs->verdict.code == NFT_BREAK) return; } } } /* * The last pointer isn't really necessary, but the compiler isn't able to * determine that the result of nft_expr_last() is always the same since it * can't assume that the dlen value wasn't changed within calls in the loop. */ #define nft_rule_for_each_expr(expr, last, rule) \ for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \ (expr) != (last); \ (expr) = nft_expr_next(expr)) #define NFT_CHAIN_POLICY_UNSET U8_MAX struct nft_rule_dp { u64 is_last:1, dlen:12, handle:42; /* for tracing */ unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; struct nft_rule_dp_last { struct nft_rule_dp end; /* end of nft_rule_blob marker */ struct rcu_head h; /* call_rcu head */ struct nft_rule_blob *blob; /* ptr to free via call_rcu */ const struct nft_chain *chain; /* for nftables tracing */ }; static inline const struct nft_rule_dp *nft_rule_next(const struct nft_rule_dp *rule) { return (void *)rule + sizeof(*rule) + rule->dlen; } struct nft_rule_blob { unsigned long size; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_rule_dp)))); }; /** * struct nft_chain - nf_tables chain * * @blob_gen_0: rule blob pointer to the current generation * @blob_gen_1: rule blob pointer to the future generation * @rules: list of rules in the chain * @list: used internally * @rhlhead: used internally * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain * @flags: bitmask of enum NFTA_CHAIN_FLAGS * @bound: bind or not * @genmask: generation mask * @name: name of the chain * @udlen: user data length * @udata: user data in the chain * @blob_next: rule blob pointer to the next in the chain */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; struct nft_rule_blob __rcu *blob_gen_1; struct list_head rules; struct list_head list; struct rhlist_head rhlhead; struct nft_table *table; u64 handle; u32 use; u8 flags:5, bound:1, genmask:2; char *name; u16 udlen; u8 *udata; /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv); int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, NFT_CHAIN_T_NAT, NFT_CHAIN_T_MAX }; /** * struct nft_chain_type - nf_tables chain type info * * @name: name of the type * @type: numeric identifier * @family: address family * @owner: module owner * @hook_mask: mask of valid hooks * @hooks: array of hook functions * @ops_register: base chain register function * @ops_unregister: base chain unregister function */ struct nft_chain_type { const char *name; enum nft_chain_types type; int family; struct module *owner; unsigned int hook_mask; nf_hookfn *hooks[NFT_MAX_HOOKS]; int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); }; int nft_chain_validate_dependency(const struct nft_chain *chain, enum nft_chain_types type); int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags); static inline bool nft_chain_binding(const struct nft_chain *chain) { return chain->flags & NFT_CHAIN_BINDING; } static inline bool nft_chain_is_bound(struct nft_chain *chain) { return (chain->flags & NFT_CHAIN_BINDING) && chain->bound; } int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); void nf_tables_chain_destroy(struct nft_chain *chain); struct nft_stats { u64 bytes; u64 pkts; struct u64_stats_sync syncp; }; struct nft_hook { struct list_head list; struct nf_hook_ops ops; struct rcu_head rcu; }; /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops * @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family) * @type: chain type * @policy: default policy * @flags: indicate the base chain disabled or not * @stats: per-cpu chain stats * @chain: the chain * @flow_block: flow block (for hardware offload) */ struct nft_base_chain { struct nf_hook_ops ops; struct list_head hook_list; const struct nft_chain_type *type; u8 policy; u8 flags; struct nft_stats __percpu *stats; struct nft_chain chain; struct flow_block flow_block; }; static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) { return container_of(chain, struct nft_base_chain, chain); } static inline bool nft_is_base_chain(const struct nft_chain *chain) { return chain->flags & NFT_CHAIN_BASE; } int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); static inline bool nft_use_inc(u32 *use) { if (*use == UINT_MAX) return false; (*use)++; return true; } static inline void nft_use_dec(u32 *use) { WARN_ON_ONCE((*use)-- == 0); } /* For error and abort path: restore use counter to previous state. */ static inline void nft_use_inc_restore(u32 *use) { WARN_ON_ONCE(!nft_use_inc(use)); } #define nft_use_dec_restore nft_use_dec /** * struct nft_table - nf_tables table * * @list: used internally * @chains_ht: chains in the table * @chains: same, for stable walks * @sets: sets in the table * @objects: stateful objects in the table * @flowtables: flow tables in the table * @hgenerator: handle generator state * @handle: table handle * @use: number of chain references to this table * @family:address family * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask * @nlpid: netlink port ID * @name: name of the table * @udlen: length of the user data * @udata: user data * @validate_state: internal, set when transaction adds jumps */ struct nft_table { struct list_head list; struct rhltable chains_ht; struct list_head chains; struct list_head sets; struct list_head objects; struct list_head flowtables; u64 hgenerator; u64 handle; u32 use; u16 family:6, flags:8, genmask:2; u32 nlpid; char *name; u16 udlen; u8 *udata; u8 validate_state; }; static inline bool nft_table_has_owner(const struct nft_table *table) { return table->flags & NFT_TABLE_F_OWNER; } static inline bool nft_table_is_orphan(const struct nft_table *table) { return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) == NFT_TABLE_F_PERSIST; } static inline bool nft_base_chain_netdev(int family, u32 hooknum) { return family == NFPROTO_NETDEV || (family == NFPROTO_INET && hooknum == NF_INET_INGRESS); } void nft_register_chain_type(const struct nft_chain_type *); void nft_unregister_chain_type(const struct nft_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); /** * struct nft_object_hash_key - key to lookup nft_object * * @name: name of the stateful object to look up * @table: table the object belongs to */ struct nft_object_hash_key { const char *name; const struct nft_table *table; }; /** * struct nft_object - nf_tables stateful object * * @list: table stateful object list node * @rhlhead: nft_objname_ht node * @key: keys that identify this object * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle * @udlen: length of user data * @udata: user data * @ops: object operations * @data: object data, layout depends on type */ struct nft_object { struct list_head list; struct rhlist_head rhlhead; struct nft_object_hash_key key; u32 genmask:2; u32 use; u64 handle; u16 udlen; u8 *udata; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_obj_data(const struct nft_object *obj) { return (void *)obj->data; } #define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) struct nft_object *nft_obj_lookup(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, u16 flags, int family, int report, gfp_t gfp); /** * struct nft_object_type - stateful object type * * @select_ops: function to select nft_object_ops * @ops: default ops, used when no select_ops functions is present * @list: list node in list of object types * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { const struct nft_object_ops *(*select_ops)(const struct nft_ctx *, const struct nlattr * const tb[]); const struct nft_object_ops *ops; struct list_head list; u32 type; unsigned int maxattr; u8 family; struct module *owner; const struct nla_policy *policy; }; /** * struct nft_object_ops - stateful object operations * * @eval: stateful object evaluation function * @size: stateful object size * @init: initialize object from netlink attributes * @destroy: release existing stateful object * @dump: netlink dump stateful object * @update: update stateful object * @type: pointer to object type */ struct nft_object_ops { void (*eval)(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt); unsigned int size; int (*init)(const struct nft_ctx *ctx, const struct nlattr *const tb[], struct nft_object *obj); void (*destroy)(const struct nft_ctx *ctx, struct nft_object *obj); int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); void (*update)(struct nft_object *obj, struct nft_object *newobj); const struct nft_object_type *type; }; int nft_register_obj(struct nft_object_type *obj_type); void nft_unregister_obj(struct nft_object_type *obj_type); #define NFT_NETDEVICE_MAX 256 /** * struct nft_flowtable - nf_tables flow table * * @list: flow table list node in table list * @table: the table the flow table is contained in * @name: name of this flow table * @hooknum: hook number * @ops_len: number of hooks in array * @genmask: generation mask * @use: number of references to this flow table * @handle: unique object handle * @hook_list: hook list for hooks per net_device in flowtables * @data: rhashtable and garbage collector */ struct nft_flowtable { struct list_head list; struct nft_table *table; char *name; int hooknum; int ops_len; u32 genmask:2; u32 use; u64 handle; /* runtime data below here */ struct list_head hook_list ____cacheline_aligned; struct nf_flowtable data; }; struct nft_flowtable *nft_flowtable_lookup(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask); void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, struct nft_flowtable *flowtable, enum nft_trans_phase phase); void nft_register_flowtable_type(struct nf_flowtable_type *type); void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * * @trace: other struct members are initialised * @nf_trace: copy of skb->nf_trace before rule evaluation * @type: event type (enum nft_trace_types) * @skbid: hash of skb to be used as trace id * @packet_dumped: packet headers sent in a previous traceinfo message * @basechain: base chain currently processed */ struct nft_traceinfo { bool trace; bool nf_trace; bool packet_dumped; enum nft_trace_types type:8; u32 skbid; const struct nft_base_chain *basechain; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_chain *basechain); void nft_trace_notify(const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_rule_dp *rule, struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_CHAIN(family, name) \ MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) #define MODULE_ALIAS_NFT_AF_EXPR(family, name) \ MODULE_ALIAS("nft-expr-" __stringify(family) "-" name) #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type)) #if IS_ENABLED(CONFIG_NF_TABLES) /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations * they're active in. A set bit means they're inactive in the generation * represented by that bit. * * New objects start out as inactive in the current and active in the * next generation. When committing the ruleset the bitmask is cleared, * meaning they're active in all generations. When removing an object, * it is set inactive in the next generation. After committing the ruleset, * the objects are removed. */ static inline unsigned int nft_gencursor_next(const struct net *net) { return net->nft.gencursor + 1 == 1 ? 1 : 0; } static inline u8 nft_genmask_next(const struct net *net) { return 1 << nft_gencursor_next(net); } static inline u8 nft_genmask_cur(const struct net *net) { /* Use READ_ONCE() to prevent refetching the value for atomicity */ return 1 << READ_ONCE(net->nft.gencursor); } #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) /* * Generic transaction helpers */ /* Check if this object is currently active. */ #define nft_is_active(__net, __obj) \ (((__obj)->genmask & nft_genmask_cur(__net)) == 0) /* Check if this object is active in the next generation. */ #define nft_is_active_next(__net, __obj) \ (((__obj)->genmask & nft_genmask_next(__net)) == 0) /* This object becomes active in the next generation. */ #define nft_activate_next(__net, __obj) \ (__obj)->genmask = nft_genmask_cur(__net) /* This object becomes inactive in the next generation. */ #define nft_deactivate_next(__net, __obj) \ (__obj)->genmask = nft_genmask_next(__net) /* After committing the ruleset, clear the stale generation bit. */ #define nft_clear(__net, __obj) \ (__obj)->genmask &= ~nft_genmask_next(__net) #define nft_active_genmask(__obj, __genmask) \ !((__obj)->genmask & __genmask) /* * Set element transaction helpers */ static inline bool nft_set_elem_active(const struct nft_set_ext *ext, u8 genmask) { return !(ext->genmask & genmask); } static inline void nft_set_elem_change_active(const struct net *net, const struct nft_set *set, struct nft_set_ext *ext) { ext->genmask ^= nft_genmask_next(net); } #endif /* IS_ENABLED(CONFIG_NF_TABLES) */ #define NFT_SET_ELEM_DEAD_MASK (1 << 2) #if defined(__LITTLE_ENDIAN_BITFIELD) #define NFT_SET_ELEM_DEAD_BIT 2 #elif defined(__BIG_ENDIAN_BITFIELD) #define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) #else #error #endif static inline void nft_set_elem_dead(struct nft_set_ext *ext) { unsigned long *word = (unsigned long *)ext; BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); set_bit(NFT_SET_ELEM_DEAD_BIT, word); } static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) { unsigned long *word = (unsigned long *)ext; BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); return test_bit(NFT_SET_ELEM_DEAD_BIT, word); } /** * struct nft_trans - nf_tables object update in transaction * * @list: used internally * @net: struct net * @table: struct nft_table the object resides in * @msg_type: message type * @seq: netlink sequence number * @flags: modifiers to new request * @report: notify via unicast netlink message * @put_net: net needs to be put * * This is the information common to all objects in the transaction, * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; struct net *net; struct nft_table *table; int msg_type; u32 seq; u16 flags; u8 report:1; u8 put_net:1; }; /** * struct nft_trans_binding - nf_tables object with binding support in transaction * @nft_trans: base structure, MUST be first member * @binding_list: list of objects with possible bindings * * This is the base type used by objects that can be bound to a chain. */ struct nft_trans_binding { struct nft_trans nft_trans; struct list_head binding_list; }; struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; struct nft_chain *chain; struct nft_flow_rule *flow; u32 rule_id; bool bound; }; #define nft_trans_container_rule(trans) \ container_of(trans, struct nft_trans_rule, nft_trans) #define nft_trans_rule(trans) \ nft_trans_container_rule(trans)->rule #define nft_trans_flow_rule(trans) \ nft_trans_container_rule(trans)->flow #define nft_trans_rule_id(trans) \ nft_trans_container_rule(trans)->rule_id #define nft_trans_rule_bound(trans) \ nft_trans_container_rule(trans)->bound #define nft_trans_rule_chain(trans) \ nft_trans_container_rule(trans)->chain struct nft_trans_set { struct nft_trans_binding nft_trans_binding; struct list_head list_trans_newset; struct nft_set *set; u32 set_id; u32 gc_int; u64 timeout; bool update; bool bound; u32 size; }; #define nft_trans_container_set(t) \ container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans) #define nft_trans_set(trans) \ nft_trans_container_set(trans)->set #define nft_trans_set_id(trans) \ nft_trans_container_set(trans)->set_id #define nft_trans_set_bound(trans) \ nft_trans_container_set(trans)->bound #define nft_trans_set_update(trans) \ nft_trans_container_set(trans)->update #define nft_trans_set_timeout(trans) \ nft_trans_container_set(trans)->timeout #define nft_trans_set_gc_int(trans) \ nft_trans_container_set(trans)->gc_int #define nft_trans_set_size(trans) \ nft_trans_container_set(trans)->size struct nft_trans_chain { struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; char *name; struct nft_stats __percpu *stats; u8 policy; bool update; bool bound; u32 chain_id; struct nft_base_chain *basechain; struct list_head hook_list; }; #define nft_trans_container_chain(t) \ container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans) #define nft_trans_chain(trans) \ nft_trans_container_chain(trans)->chain #define nft_trans_chain_update(trans) \ nft_trans_container_chain(trans)->update #define nft_trans_chain_name(trans) \ nft_trans_container_chain(trans)->name #define nft_trans_chain_stats(trans) \ nft_trans_container_chain(trans)->stats #define nft_trans_chain_policy(trans) \ nft_trans_container_chain(trans)->policy #define nft_trans_chain_bound(trans) \ nft_trans_container_chain(trans)->bound #define nft_trans_chain_id(trans) \ nft_trans_container_chain(trans)->chain_id #define nft_trans_basechain(trans) \ nft_trans_container_chain(trans)->basechain #define nft_trans_chain_hooks(trans) \ nft_trans_container_chain(trans)->hook_list struct nft_trans_table { struct nft_trans nft_trans; bool update; }; #define nft_trans_container_table(trans) \ container_of(trans, struct nft_trans_table, nft_trans) #define nft_trans_table_update(trans) \ nft_trans_container_table(trans)->update enum nft_trans_elem_flags { NFT_TRANS_UPD_TIMEOUT = (1 << 0), NFT_TRANS_UPD_EXPIRATION = (1 << 1), }; struct nft_elem_update { u64 timeout; u64 expiration; u8 flags; }; struct nft_trans_one_elem { struct nft_elem_priv *priv; struct nft_elem_update *update; }; struct nft_trans_elem { struct nft_trans nft_trans; struct nft_set *set; bool bound; unsigned int nelems; struct nft_trans_one_elem elems[] __counted_by(nelems); }; #define nft_trans_container_elem(t) \ container_of(t, struct nft_trans_elem, nft_trans) #define nft_trans_elem_set(trans) \ nft_trans_container_elem(trans)->set #define nft_trans_elem_set_bound(trans) \ nft_trans_container_elem(trans)->bound struct nft_trans_obj { struct nft_trans nft_trans; struct nft_object *obj; struct nft_object *newobj; bool update; }; #define nft_trans_container_obj(t) \ container_of(t, struct nft_trans_obj, nft_trans) #define nft_trans_obj(trans) \ nft_trans_container_obj(trans)->obj #define nft_trans_obj_newobj(trans) \ nft_trans_container_obj(trans)->newobj #define nft_trans_obj_update(trans) \ nft_trans_container_obj(trans)->update struct nft_trans_flowtable { struct nft_trans nft_trans; struct nft_flowtable *flowtable; struct list_head hook_list; u32 flags; bool update; }; #define nft_trans_container_flowtable(t) \ container_of(t, struct nft_trans_flowtable, nft_trans) #define nft_trans_flowtable(trans) \ nft_trans_container_flowtable(trans)->flowtable #define nft_trans_flowtable_update(trans) \ nft_trans_container_flowtable(trans)->update #define nft_trans_flowtable_hooks(trans) \ nft_trans_container_flowtable(trans)->hook_list #define nft_trans_flowtable_flags(trans) \ nft_trans_container_flowtable(trans)->flags #define NFT_TRANS_GC_BATCHCOUNT 256 struct nft_trans_gc { struct list_head list; struct net *net; struct nft_set *set; u32 seq; u16 count; struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; static inline void nft_ctx_update(struct nft_ctx *ctx, const struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_NEWRULE: case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: ctx->chain = nft_trans_rule_chain(trans); break; case NFT_MSG_NEWCHAIN: case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: ctx->chain = nft_trans_chain(trans); break; default: ctx->chain = NULL; break; } ctx->net = trans->net; ctx->table = trans->table; ctx->family = trans->table->family; ctx->report = trans->report; ctx->flags = trans->flags; ctx->seq = trans->seq; } struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, unsigned int gc_seq); struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv); int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); void nf_tables_trans_destroy_flush_work(void); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); #ifdef CONFIG_MODULES __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...); #else static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; } #endif struct nftables_pernet { struct list_head tables; struct list_head commit_list; struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; u64 table_handle; u64 tstamp; unsigned int base_seq; unsigned int gc_seq; u8 validate_state; }; extern unsigned int nf_tables_net_id; static inline struct nftables_pernet *nft_pernet(const struct net *net) { return net_generic(net, nf_tables_net_id); } static inline u64 nft_net_tstamp(const struct net *net) { return nft_pernet(net)->tstamp; } #define __NFT_REDUCE_READONLY 1UL #define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY static inline bool nft_reduce_is_readonly(const struct nft_expr *expr) { return expr->ops->reduce == NFT_REDUCE_READONLY; } void nft_reg_track_update(struct nft_regs_track *track, const struct nft_expr *expr, u8 dreg, u8 len); void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); static inline bool nft_reg_track_cmp(struct nft_regs_track *track, const struct nft_expr *expr, u8 dreg) { return track->regs[dreg].selector && track->regs[dreg].selector->ops == expr->ops && track->regs[dreg].num_reg == 0; } #endif /* _NET_NF_TABLES_H */
222 186 664 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ /* Copyright (C) 2016-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * SipHash: a fast short-input PRF * https://131002.net/siphash/ * * This implementation is specifically for SipHash2-4 for a secure PRF * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for * hashtables. */ #ifndef _LINUX_SIPHASH_H #define _LINUX_SIPHASH_H #include <linux/types.h> #include <linux/kernel.h> #define SIPHASH_ALIGNMENT __alignof__(u64) typedef struct { u64 key[2]; } siphash_key_t; #define siphash_aligned_key_t siphash_key_t __aligned(16) static inline bool siphash_key_is_zero(const siphash_key_t *key) { return !(key->key[0] | key->key[1]); } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); u64 siphash_3u64(const u64 a, const u64 b, const u64 c, const siphash_key_t *key); u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, const siphash_key_t *key); u64 siphash_1u32(const u32 a, const siphash_key_t *key); u64 siphash_3u32(const u32 a, const u32 b, const u32 c, const siphash_key_t *key); static inline u64 siphash_2u32(const u32 a, const u32 b, const siphash_key_t *key) { return siphash_1u64((u64)b << 32 | a, key); } static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, const siphash_key_t *key) { return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); } static inline u64 ___siphash_aligned(const __le64 *data, size_t len, const siphash_key_t *key) { if (__builtin_constant_p(len) && len == 4) return siphash_1u32(le32_to_cpup((const __le32 *)data), key); if (__builtin_constant_p(len) && len == 8) return siphash_1u64(le64_to_cpu(data[0]), key); if (__builtin_constant_p(len) && len == 16) return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), key); if (__builtin_constant_p(len) && len == 24) return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), le64_to_cpu(data[2]), key); if (__builtin_constant_p(len) && len == 32) return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), le64_to_cpu(data[2]), le64_to_cpu(data[3]), key); return __siphash_aligned(data, len, key); } /** * siphash - compute 64-bit siphash PRF value * @data: buffer to hash * @size: size of @data * @key: the siphash key */ static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); return ___siphash_aligned(data, len, key); } #define HSIPHASH_ALIGNMENT __alignof__(unsigned long) typedef struct { unsigned long key[2]; } hsiphash_key_t; u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, const hsiphash_key_t *key); u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, const hsiphash_key_t *key); static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, const hsiphash_key_t *key) { if (__builtin_constant_p(len) && len == 4) return hsiphash_1u32(le32_to_cpu(data[0]), key); if (__builtin_constant_p(len) && len == 8) return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), key); if (__builtin_constant_p(len) && len == 12) return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), le32_to_cpu(data[2]), key); if (__builtin_constant_p(len) && len == 16) return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), le32_to_cpu(data[2]), le32_to_cpu(data[3]), key); return __hsiphash_aligned(data, len, key); } /** * hsiphash - compute 32-bit hsiphash PRF value * @data: buffer to hash * @size: size of @data * @key: the hsiphash key */ static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); return ___hsiphash_aligned(data, len, key); } /* * These macros expose the raw SipHash and HalfSipHash permutations. * Do not use them directly! If you think you have a use for them, * be sure to CC the maintainer of this file explaining why. */ #define SIPHASH_PERMUTATION(a, b, c, d) ( \ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) #define SIPHASH_CONST_0 0x736f6d6570736575ULL #define SIPHASH_CONST_1 0x646f72616e646f6dULL #define SIPHASH_CONST_2 0x6c7967656e657261ULL #define SIPHASH_CONST_3 0x7465646279746573ULL #define HSIPHASH_PERMUTATION(a, b, c, d) ( \ (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \ (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \ (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \ (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16)) #define HSIPHASH_CONST_0 0U #define HSIPHASH_CONST_1 0U #define HSIPHASH_CONST_2 0x6c796765U #define HSIPHASH_CONST_3 0x74656462U #endif /* _LINUX_SIPHASH_H */
20 20 18 20 39 39 58 31 58 36 58 56 58 90 73 90 78 78 78 78 77 78 78 66 66 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 // SPDX-License-Identifier: GPL-2.0-only /* * This file contians vfs address (mmap) ops for 9P2000. * * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/pagemap.h> #include <linux/sched.h> #include <linux/swap.h> #include <linux/uio.h> #include <linux/netfs.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <trace/events/netfs.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "cache.h" #include "fid.h" /* * Writeback calls this when it finds a folio that needs uploading. This isn't * called if writeback only has copy-to-cache to deal with. */ static void v9fs_begin_writeback(struct netfs_io_request *wreq) { struct p9_fid *fid; fid = v9fs_fid_find_inode(wreq->inode, true, INVALID_UID, true); if (!fid) { WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n", wreq->inode->i_ino); return; } wreq->wsize = fid->clnt->msize - P9_IOHDRSZ; if (fid->iounit) wreq->wsize = min(wreq->wsize, fid->iounit); wreq->netfs_priv = fid; wreq->io_streams[0].avail = true; } /* * Issue a subrequest to write to the server. */ static void v9fs_issue_write(struct netfs_io_subrequest *subreq) { struct p9_fid *fid = subreq->rreq->netfs_priv; int err, len; len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); netfs_write_subrequest_terminated(subreq, len ?: err, false); } /** * v9fs_issue_read - Issue a read from 9P * @subreq: The read to make */ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; struct p9_fid *fid = rreq->netfs_priv; unsigned long long pos = subreq->start + subreq->transferred; int total, err; total = p9_client_read(fid, pos, &subreq->io_iter, &err); /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ if (subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (pos + total >= i_size_read(rreq->inode)) __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); if (!err) subreq->transferred += total; netfs_read_subreq_terminated(subreq, err, false); } /** * v9fs_init_request - Initialise a request * @rreq: The read request * @file: The file being read from */ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file) { struct p9_fid *fid; bool writing = (rreq->origin == NETFS_READ_FOR_WRITE || rreq->origin == NETFS_WRITETHROUGH || rreq->origin == NETFS_UNBUFFERED_WRITE || rreq->origin == NETFS_DIO_WRITE); if (rreq->origin == NETFS_WRITEBACK) return 0; /* We don't get the write handle until we find we * have actually dirty data and not just * copy-to-cache data. */ if (file) { fid = file->private_data; if (!fid) goto no_fid; p9_fid_get(fid); } else { fid = v9fs_fid_find_inode(rreq->inode, writing, INVALID_UID, true); if (!fid) goto no_fid; } rreq->wsize = fid->clnt->msize - P9_IOHDRSZ; if (fid->iounit) rreq->wsize = min(rreq->wsize, fid->iounit); /* we might need to read from a fid that was opened write-only * for read-modify-write of page cache, use the writeback fid * for that */ WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && !(fid->mode & P9_ORDWR)); rreq->netfs_priv = fid; return 0; no_fid: WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n", rreq->inode->i_ino); return -EINVAL; } /** * v9fs_free_request - Cleanup request initialized by v9fs_init_rreq * @rreq: The I/O request to clean up */ static void v9fs_free_request(struct netfs_io_request *rreq) { struct p9_fid *fid = rreq->netfs_priv; p9_fid_put(fid); } const struct netfs_request_ops v9fs_req_ops = { .init_request = v9fs_init_request, .free_request = v9fs_free_request, .issue_read = v9fs_issue_read, .begin_writeback = v9fs_begin_writeback, .issue_write = v9fs_issue_write, }; const struct address_space_operations v9fs_addr_operations = { .read_folio = netfs_read_folio, .readahead = netfs_readahead, .dirty_folio = netfs_dirty_folio, .release_folio = netfs_release_folio, .invalidate_folio = netfs_invalidate_folio, .direct_IO = noop_direct_IO, .writepages = netfs_writepages, };
11 134 17 116 24 87 11 1 1 1 1 1 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include <linux/device.h> #include <linux/etherdevice.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/xarray.h> #include <net/devlink.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <rdma/ib_verbs.h> #include "netlink_gen.h" struct devlink_rel; #define DEVLINK_REGISTERED XA_MARK_1 #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) struct devlink_dev_stats { u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; }; struct devlink { u32 index; struct xarray ports; struct list_head rate_list; struct list_head sb_list; struct list_head dpipe_table_list; struct list_head resource_list; struct xarray params; struct list_head region_list; struct list_head reporter_list; struct devlink_dpipe_headers *dpipe_headers; struct list_head trap_list; struct list_head trap_group_list; struct list_head trap_policer_list; struct list_head linecard_list; const struct devlink_ops *ops; struct xarray snapshot_ids; struct devlink_dev_stats stats; struct device *dev; possible_net_t _net; /* Serializes access to devlink instance specific objects such as * port, sb, dpipe, resource, params, region, traps and more. */ struct mutex lock; struct lock_class_key lock_key; u8 reload_failed:1; refcount_t refcount; struct rcu_work rwork; struct devlink_rel *rel; struct xarray nested_rels; char priv[] __aligned(NETDEV_ALIGN); }; extern struct xarray devlinks; extern struct genl_family devlink_nl_family; /* devlink instances are open to the access from the user space after * devlink_register() call. Such logical barrier allows us to have certain * expectations related to locking. * * Before *_register() - we are in initialization stage and no parallel * access possible to the devlink instance. All drivers perform that phase * by implicitly holding device_lock. * * After *_register() - users and driver can access devlink instance at * the same time. */ #define ASSERT_DEVLINK_REGISTERED(d) \ WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) #define ASSERT_DEVLINK_NOT_REGISTERED(d) \ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) /* Iterate over devlink pointers which were possible to get reference to. * devlink_put() needs to be called for each iterated devlink pointer * in loop body in order to release the reference. */ #define devlinks_xa_for_each_registered_get(net, index, devlink) \ for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++) struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); static inline bool __devl_is_registered(struct devlink *devlink) { return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); } static inline bool devl_is_registered(struct devlink *devlink) { devl_assert_locked(devlink); return __devl_is_registered(devlink); } static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) { if (dev_lock) device_lock(devlink->dev); devl_lock(devlink); } static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock) { devl_unlock(devlink); if (dev_lock) device_unlock(devlink->dev); } typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index); typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index, u32 rel_index); void devlink_rel_nested_in_clear(u32 rel_index); int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, u32 obj_index, devlink_rel_notify_cb_t *notify_cb, devlink_rel_cleanup_cb_t *cleanup_cb, struct devlink *devlink); void devlink_rel_nested_in_notify(struct devlink *devlink); int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, u32 rel_index, int attrtype, bool *msg_updated); /* Netlink */ enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, }; /* state held across netlink dumps */ struct devlink_nl_dump_state { unsigned long instance; int idx; union { /* DEVLINK_CMD_REGION_READ */ struct { u64 start_offset; }; /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET */ struct { u64 dump_ts; }; }; }; typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags); struct devlink * devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, bool dev_lock); int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, devlink_nl_dump_one_func_t *dump_one); static inline struct devlink_nl_dump_state * devlink_dump_state(struct netlink_callback *cb) { NL_ASSERT_CTX_FITS(struct devlink_nl_dump_state); return (struct devlink_nl_dump_state *)cb->ctx; } static inline int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) { if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name)) return -EMSGSIZE; if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev))) return -EMSGSIZE; return 0; } static inline int devlink_nl_put_u64(struct sk_buff *msg, int attrtype, u64 val) { return nla_put_u64_64bit(msg, attrtype, val, DEVLINK_ATTR_PAD); } int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); static inline bool devlink_nl_notify_need(struct devlink *devlink) { return genl_has_listeners(&devlink_nl_family, devlink_net(devlink), DEVLINK_MCGRP_CONFIG); } struct devlink_obj_desc { struct rcu_head rcu; const char *bus_name; const char *dev_name; unsigned int port_index; bool port_index_valid; long data[]; }; static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, struct devlink *devlink) { memset(desc, 0, sizeof(*desc)); desc->bus_name = devlink->dev->bus->name; desc->dev_name = dev_name(devlink->dev); } static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, struct devlink_port *devlink_port) { desc->port_index = devlink_port->index; desc->port_index_valid = true; } int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data); static inline void devlink_nl_notify_send_desc(struct devlink *devlink, struct sk_buff *msg, struct devlink_obj_desc *desc) { genlmsg_multicast_netns_filtered(&devlink_nl_family, devlink_net(devlink), msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL, devlink_nl_notify_filter, desc); } static inline void devlink_nl_notify_send(struct devlink *devlink, struct sk_buff *msg) { struct devlink_obj_desc desc; devlink_nl_obj_desc_init(&desc, devlink); devlink_nl_notify_send_desc(devlink, msg, &desc); } /* Notify */ void devlink_notify_register(struct devlink *devlink); void devlink_notify_unregister(struct devlink *devlink); void devlink_ports_notify_register(struct devlink *devlink); void devlink_ports_notify_unregister(struct devlink *devlink); void devlink_params_notify_register(struct devlink *devlink); void devlink_params_notify_unregister(struct devlink *devlink); void devlink_regions_notify_register(struct devlink *devlink); void devlink_regions_notify_unregister(struct devlink *devlink); void devlink_trap_policers_notify_register(struct devlink *devlink); void devlink_trap_policers_notify_unregister(struct devlink *devlink); void devlink_trap_groups_notify_register(struct devlink *devlink); void devlink_trap_groups_notify_unregister(struct devlink *devlink); void devlink_traps_notify_register(struct devlink *devlink); void devlink_traps_notify_unregister(struct devlink *devlink); void devlink_rates_notify_register(struct devlink *devlink); void devlink_rates_notify_unregister(struct devlink *devlink); void devlink_linecards_notify_register(struct devlink *devlink); void devlink_linecards_notify_unregister(struct devlink *devlink); /* Ports */ #define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \ WARN_ON_ONCE(!(devlink_port)->initialized) struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, unsigned int port_index); int devlink_port_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr); struct devlink_port * devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info); struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, struct nlattr **attrs); /* Reload */ bool devlink_reload_actions_valid(const struct devlink_ops *ops); int devlink_reload(struct devlink *devlink, struct net *dest_net, enum devlink_reload_action action, enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack); static inline bool devlink_reload_supported(const struct devlink_ops *ops) { return ops->reload_down && ops->reload_up; } /* Params */ void devlink_params_driverinit_load_new(struct devlink *devlink); /* Resources */ struct devlink_resource; int devlink_resources_validate(struct devlink *devlink, struct devlink_resource *resource, struct genl_info *info); /* Rates */ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack); /* Linecards */ unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
1284 11 728 206 15 4071 54 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H #include <asm/types.h> #include <linux/bits.h> #include <linux/typecheck.h> #include <uapi/linux/kernel.h> #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) #define BITS_TO_BYTES(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) #define BYTES_TO_BITS(nb) ((nb) * BITS_PER_BYTE) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); /* * Defined here because those may be needed by architecture-specific static * inlines. */ #include <asm-generic/bitops/generic-non-atomic.h> /* * Many architecture-specific non-atomic bitops contain inline asm code and due * to that the compiler can't optimize them to compile-time expressions or * constants. In contrary, generic_*() helpers are defined in pure C and * compilers optimize them just well. * Therefore, to make `unsigned long foo = 0; __set_bit(BAR, &foo)` effectively * equal to `unsigned long foo = BIT(BAR)`, pick the generic C alternative when * the arguments can be resolved at compile time. That expression itself is a * constant and doesn't bring any functional changes to the rest of cases. * The casts to `uintptr_t` are needed to mitigate `-Waddress` warnings when * passing a bitmap from .bss or .data (-> `!!addr` is always true). */ #define bitop(op, nr, addr) \ ((__builtin_constant_p(nr) && \ __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ (uintptr_t)(addr) != (uintptr_t)NULL && \ __builtin_constant_p(*(const unsigned long *)(addr))) ? \ const##op(nr, addr) : op(nr, addr)) /* * The following macros are non-atomic versions of their non-underscored * counterparts. */ #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) #define __change_bit(nr, addr) bitop(___change_bit, nr, addr) #define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) #define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include <asm/bitops.h> /* Check that the bitops prototypes are sane */ #define __check_bitop_pr(name) \ static_assert(__same_type(arch_##name, generic_##name) && \ __same_type(const_##name, generic_##name) && \ __same_type(_##name, generic_##name)) __check_bitop_pr(__set_bit); __check_bitop_pr(__clear_bit); __check_bitop_pr(__change_bit); __check_bitop_pr(__test_and_set_bit); __check_bitop_pr(__test_and_clear_bit); __check_bitop_pr(__test_and_change_bit); __check_bitop_pr(test_bit); __check_bitop_pr(test_bit_acquire); #undef __check_bitop_pr static inline int get_bitmask_order(unsigned int count) { int order; order = fls(count); return order; /* We could be slightly more clever with -1 here... */ } static __always_inline unsigned long hweight_long(unsigned long w) { return sizeof(w) == 4 ? hweight32(w) : hweight64((__u64)w); } /** * rol64 - rotate a 64-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u64 rol64(__u64 word, unsigned int shift) { return (word << (shift & 63)) | (word >> ((-shift) & 63)); } /** * ror64 - rotate a 64-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u64 ror64(__u64 word, unsigned int shift) { return (word >> (shift & 63)) | (word << ((-shift) & 63)); } /** * rol32 - rotate a 32-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << (shift & 31)) | (word >> ((-shift) & 31)); } /** * ror32 - rotate a 32-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u32 ror32(__u32 word, unsigned int shift) { return (word >> (shift & 31)) | (word << ((-shift) & 31)); } /** * rol16 - rotate a 16-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u16 rol16(__u16 word, unsigned int shift) { return (word << (shift & 15)) | (word >> ((-shift) & 15)); } /** * ror16 - rotate a 16-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u16 ror16(__u16 word, unsigned int shift) { return (word >> (shift & 15)) | (word << ((-shift) & 15)); } /** * rol8 - rotate an 8-bit value left * @word: value to rotate * @shift: bits to roll */ static inline __u8 rol8(__u8 word, unsigned int shift) { return (word << (shift & 7)) | (word >> ((-shift) & 7)); } /** * ror8 - rotate an 8-bit value right * @word: value to rotate * @shift: bits to roll */ static inline __u8 ror8(__u8 word, unsigned int shift) { return (word >> (shift & 7)) | (word << ((-shift) & 7)); } /** * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<32) to sign bit * * This is safe to use for 16- and 8-bit types as well. */ static __always_inline __s32 sign_extend32(__u32 value, int index) { __u8 shift = 31 - index; return (__s32)(value << shift) >> shift; } /** * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<64) to sign bit */ static __always_inline __s64 sign_extend64(__u64 value, int index) { __u8 shift = 63 - index; return (__s64)(value << shift) >> shift; } static inline unsigned int fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); return fls64(l); } static inline int get_count_order(unsigned int count) { if (count == 0) return -1; return fls(--count); } /** * get_count_order_long - get order after rounding @l up to power of 2 * @l: parameter * * it is same as get_count_order() but with long type parameter */ static inline int get_count_order_long(unsigned long l) { if (l == 0UL) return -1; return (int)fls_long(--l); } /** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word * * On 64 bit arches this is a synonym for __ffs * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ static inline unsigned int __ffs64(u64 word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) return __ffs((u32)(word >> 32)) + 32; #elif BITS_PER_LONG != 64 #error BITS_PER_LONG not 32 or 64 #endif return __ffs((unsigned long)word); } /** * fns - find N'th set bit in a word * @word: The word to search * @n: Bit to find */ static inline unsigned int fns(unsigned long word, unsigned int n) { while (word && n--) word &= word - 1; return word ? __ffs(word) : BITS_PER_LONG; } /** * assign_bit - Assign value to a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @value: the value to assign */ #define assign_bit(nr, addr, value) \ ((value) ? set_bit((nr), (addr)) : clear_bit((nr), (addr))) #define __assign_bit(nr, addr, value) \ ((value) ? __set_bit((nr), (addr)) : __clear_bit((nr), (addr))) /** * __ptr_set_bit - Set bit in a pointer's value * @nr: the bit to set * @addr: the address of the pointer variable * * Example: * void *p = foo(); * __ptr_set_bit(bit, &p); */ #define __ptr_set_bit(nr, addr) \ ({ \ typecheck_pointer(*(addr)); \ __set_bit(nr, (unsigned long *)(addr)); \ }) /** * __ptr_clear_bit - Clear bit in a pointer's value * @nr: the bit to clear * @addr: the address of the pointer variable * * Example: * void *p = foo(); * __ptr_clear_bit(bit, &p); */ #define __ptr_clear_bit(nr, addr) \ ({ \ typecheck_pointer(*(addr)); \ __clear_bit(nr, (unsigned long *)(addr)); \ }) /** * __ptr_test_bit - Test bit in a pointer's value * @nr: the bit to test * @addr: the address of the pointer variable * * Example: * void *p = foo(); * if (__ptr_test_bit(bit, &p)) { * ... * } else { * ... * } */ #define __ptr_test_bit(nr, addr) \ ({ \ typecheck_pointer(*(addr)); \ test_bit(nr, (unsigned long *)(addr)); \ }) #ifdef __KERNEL__ #ifndef set_mask_bits #define set_mask_bits(ptr, mask, bits) \ ({ \ const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ typeof(*(ptr)) old__, new__; \ \ old__ = READ_ONCE(*(ptr)); \ do { \ new__ = (old__ & ~mask__) | bits__; \ } while (!try_cmpxchg(ptr, &old__, new__)); \ \ old__; \ }) #endif #ifndef bit_clear_unless #define bit_clear_unless(ptr, clear, test) \ ({ \ const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ typeof(*(ptr)) old__, new__; \ \ old__ = READ_ONCE(*(ptr)); \ do { \ if (old__ & test__) \ break; \ new__ = old__ & ~clear__; \ } while (!try_cmpxchg(ptr, &old__, new__)); \ \ !(old__ & test__); \ }) #endif #endif /* __KERNEL__ */ #endif
17 17 17 17 17 17 17 17 17 17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 // SPDX-License-Identifier: GPL-2.0 #include <linux/cred.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/quotaops.h> #include <linux/sched.h> #include <linux/slab.h> #include <net/netlink.h> #include <net/genetlink.h> static const struct genl_multicast_group quota_mcgrps[] = { { .name = "events", }, }; /* Netlink family structure for quota */ static struct genl_family quota_genl_family __ro_after_init = { .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, .maxattr = QUOTA_NL_A_MAX, .mcgrps = quota_mcgrps, .n_mcgrps = ARRAY_SIZE(quota_mcgrps), }; /** * quota_send_warning - Send warning to userspace about exceeded quota * @qid: The kernel internal quota identifier. * @dev: The device on which the fs is mounted (sb->s_dev) * @warntype: The type of the warning: QUOTA_NL_... * * This can be used by filesystems (including those which don't use * dquot) to send a message to userspace relating to quota limits. * */ void quota_send_warning(struct kqid qid, dev_t dev, const char warntype) { static atomic_t seq; struct sk_buff *skb; void *msg_head; int ret; int msg_size = 4 * nla_total_size(sizeof(u32)) + 2 * nla_total_size_64bit(sizeof(u64)); /* We have to allocate using GFP_NOFS as we are called from a * filesystem performing write and thus further recursion into * the fs to free some data could cause deadlocks. */ skb = genlmsg_new(msg_size, GFP_NOFS); if (!skb) { printk(KERN_ERR "VFS: Not enough memory to send quota warning.\n"); return; } msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), &quota_genl_family, 0, QUOTA_NL_C_WARNING); if (!msg_head) { printk(KERN_ERR "VFS: Cannot store netlink header in quota warning.\n"); goto err_out; } ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, qid.type); if (ret) goto attr_err_out; ret = nla_put_u64_64bit(skb, QUOTA_NL_A_EXCESS_ID, from_kqid_munged(&init_user_ns, qid), QUOTA_NL_A_PAD); if (ret) goto attr_err_out; ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); if (ret) goto attr_err_out; ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); if (ret) goto attr_err_out; ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); if (ret) goto attr_err_out; ret = nla_put_u64_64bit(skb, QUOTA_NL_A_CAUSED_ID, from_kuid_munged(&init_user_ns, current_uid()), QUOTA_NL_A_PAD); if (ret) goto attr_err_out; genlmsg_end(skb, msg_head); genlmsg_multicast(&quota_genl_family, skb, 0, 0, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); err_out: kfree_skb(skb); } EXPORT_SYMBOL(quota_send_warning); static int __init quota_init(void) { if (genl_register_family(&quota_genl_family) != 0) printk(KERN_ERR "VFS: Failed to create quota netlink interface.\n"); return 0; }; fs_initcall(quota_init);
3 1 5 1 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 /* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/wireless/trace.h */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg802154 #if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __RDEV_CFG802154_OPS_TRACE #include <linux/tracepoint.h> #include <net/cfg802154.h> #define MAXNAME 32 #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) #define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(wpan_phy), \ MAXNAME) #define WPAN_PHY_PR_FMT "%s" #define WPAN_PHY_PR_ARG __entry->wpan_phy_name #define WPAN_DEV_ENTRY __field(u32, identifier) #define WPAN_DEV_ASSIGN (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \ ? wpan_dev->identifier : 0) #define WPAN_DEV_PR_FMT "wpan_dev(%u)" #define WPAN_DEV_PR_ARG (__entry->identifier) #define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ __field(enum nl802154_cca_opts, cca_opt) #define WPAN_CCA_ASSIGN \ do { \ (__entry->cca_mode) = cca->mode; \ (__entry->cca_opt) = cca->opt; \ } while (0) #define WPAN_CCA_PR_FMT "cca_mode: %d, cca_opt: %d" #define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt #define BOOL_TO_STR(bo) (bo) ? "true" : "false" /************************************************************* * rdev->ops traces * *************************************************************/ DECLARE_EVENT_CLASS(wpan_phy_only_evt, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy), TP_STRUCT__entry( WPAN_PHY_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT, WPAN_PHY_PR_ARG) ); DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_suspend, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy) ); DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_resume, TP_PROTO(struct wpan_phy *wpan_phy), TP_ARGS(wpan_phy) ); TRACE_EVENT(802154_rdev_add_virtual_intf, TP_PROTO(struct wpan_phy *wpan_phy, char *name, enum nl802154_iftype type, __le64 extended_addr), TP_ARGS(wpan_phy, name, type, extended_addr), TP_STRUCT__entry( WPAN_PHY_ENTRY __string(vir_intf_name, name ? name : "<noname>") __field(enum nl802154_iftype, type) __field(__le64, extended_addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; __assign_str(vir_intf_name); __entry->type = type; __entry->extended_addr = extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, extended addr: 0x%llx", WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type, __le64_to_cpu(__entry->extended_addr)) ); TRACE_EVENT(802154_rdev_del_virtual_intf, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG) ); TRACE_EVENT(802154_rdev_set_channel, TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel), TP_ARGS(wpan_phy, page, channel), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, page) __field(u8, channel) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->page = page; __entry->channel = channel; ), TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG, __entry->page, __entry->channel) ); TRACE_EVENT(802154_rdev_set_tx_power, TP_PROTO(struct wpan_phy *wpan_phy, s32 power), TP_ARGS(wpan_phy, power), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(s32, power) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->power = power; ), TP_printk(WPAN_PHY_PR_FMT ", mbm: %d", WPAN_PHY_PR_ARG, __entry->power) ); TRACE_EVENT(802154_rdev_set_cca_mode, TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca), TP_ARGS(wpan_phy, cca), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_CCA_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_CCA_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG, WPAN_CCA_PR_ARG) ); TRACE_EVENT(802154_rdev_set_cca_ed_level, TP_PROTO(struct wpan_phy *wpan_phy, s32 ed_level), TP_ARGS(wpan_phy, ed_level), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(s32, ed_level) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->ed_level = ed_level; ), TP_printk(WPAN_PHY_PR_FMT ", ed level: %d", WPAN_PHY_PR_ARG, __entry->ed_level) ); DECLARE_EVENT_CLASS(802154_le16_template, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le16, le16arg) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->le16arg = le16arg; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __le16_to_cpu(__entry->le16arg)) ); DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg) ); DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 le16arg), TP_ARGS(wpan_phy, wpan_dev, le16arg), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", short addr: 0x%04x", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __le16_to_cpu(__entry->le16arg)) ); TRACE_EVENT(802154_rdev_set_backoff_exponent, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 min_be, u8 max_be), TP_ARGS(wpan_phy, wpan_dev, min_be, max_be), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(u8, min_be) __field(u8, max_be) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->min_be = min_be; __entry->max_be = max_be; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", min be: %d, max be: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be) ); TRACE_EVENT(802154_rdev_set_csma_backoffs, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, u8 max_csma_backoffs), TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(u8, max_csma_backoffs) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->max_csma_backoffs = max_csma_backoffs; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", max csma backoffs: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->max_csma_backoffs) ); TRACE_EVENT(802154_rdev_set_max_frame_retries, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, s8 max_frame_retries), TP_ARGS(wpan_phy, wpan_dev, max_frame_retries), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(s8, max_frame_retries) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->max_frame_retries = max_frame_retries; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", max frame retries: %d", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->max_frame_retries) ); TRACE_EVENT(802154_rdev_set_lbt_mode, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool mode), TP_ARGS(wpan_phy, wpan_dev, mode), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(bool, mode) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->mode = mode; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", lbt mode: %s", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode)) ); TRACE_EVENT(802154_rdev_set_ackreq_default, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, bool ackreq), TP_ARGS(wpan_phy, wpan_dev, ackreq), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(bool, ackreq) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->ackreq = ackreq; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", ackreq default: %s", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->ackreq)) ); TRACE_EVENT(802154_rdev_trigger_scan, TP_PROTO(struct wpan_phy *wpan_phy, struct cfg802154_scan_request *request), TP_ARGS(wpan_phy, request), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, page) __field(u32, channels) __field(u8, duration) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->page = request->page; __entry->channels = request->channels; __entry->duration = request->duration; ), TP_printk(WPAN_PHY_PR_FMT ", scan, page: %d, channels: %x, duration %d", WPAN_PHY_PR_ARG, __entry->page, __entry->channels, __entry->duration) ); TRACE_EVENT(802154_rdev_send_beacons, TP_PROTO(struct wpan_phy *wpan_phy, struct cfg802154_beacon_request *request), TP_ARGS(wpan_phy, request), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(u8, interval) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->interval = request->interval; ), TP_printk(WPAN_PHY_PR_FMT ", sending beacons (interval order: %d)", WPAN_PHY_PR_ARG, __entry->interval) ); DECLARE_EVENT_CLASS(802154_wdev_template, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG) ); DEFINE_EVENT(802154_wdev_template, 802154_rdev_abort_scan, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev) ); DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev), TP_ARGS(wpan_phy, wpan_dev) ); TRACE_EVENT(802154_rdev_associate, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, struct ieee802154_addr *coord), TP_ARGS(wpan_phy, wpan_dev, coord), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le64, addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->addr = coord->extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", associating with: 0x%llx", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) ); TRACE_EVENT(802154_rdev_disassociate, TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, struct ieee802154_addr *target), TP_ARGS(wpan_phy, wpan_dev, target), TP_STRUCT__entry( WPAN_PHY_ENTRY WPAN_DEV_ENTRY __field(__le64, addr) ), TP_fast_assign( WPAN_PHY_ASSIGN; WPAN_DEV_ASSIGN; __entry->addr = target->extended_addr; ), TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", disassociating with: 0x%llx", WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) ); TRACE_EVENT(802154_rdev_return_int, TP_PROTO(struct wpan_phy *wpan_phy, int ret), TP_ARGS(wpan_phy, ret), TP_STRUCT__entry( WPAN_PHY_ENTRY __field(int, ret) ), TP_fast_assign( WPAN_PHY_ASSIGN; __entry->ret = ret; ), TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG, __entry->ret) ); #endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
4 4 4 4 114 114 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 // SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm_proc.c * * Copyright (C)2006-2007 USAGI/WIDE Project * * Authors: Masahide NAKAMURA <nakam@linux-ipv6.org> */ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> #include <net/snmp.h> #include <net/xfrm.h> static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR), SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR), SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR), SNMP_MIB_ITEM("XfrmInNoStates", LINUX_MIB_XFRMINNOSTATES), SNMP_MIB_ITEM("XfrmInStateProtoError", LINUX_MIB_XFRMINSTATEPROTOERROR), SNMP_MIB_ITEM("XfrmInStateModeError", LINUX_MIB_XFRMINSTATEMODEERROR), SNMP_MIB_ITEM("XfrmInStateSeqError", LINUX_MIB_XFRMINSTATESEQERROR), SNMP_MIB_ITEM("XfrmInStateExpired", LINUX_MIB_XFRMINSTATEEXPIRED), SNMP_MIB_ITEM("XfrmInStateMismatch", LINUX_MIB_XFRMINSTATEMISMATCH), SNMP_MIB_ITEM("XfrmInStateInvalid", LINUX_MIB_XFRMINSTATEINVALID), SNMP_MIB_ITEM("XfrmInTmplMismatch", LINUX_MIB_XFRMINTMPLMISMATCH), SNMP_MIB_ITEM("XfrmInNoPols", LINUX_MIB_XFRMINNOPOLS), SNMP_MIB_ITEM("XfrmInPolBlock", LINUX_MIB_XFRMINPOLBLOCK), SNMP_MIB_ITEM("XfrmInPolError", LINUX_MIB_XFRMINPOLERROR), SNMP_MIB_ITEM("XfrmOutError", LINUX_MIB_XFRMOUTERROR), SNMP_MIB_ITEM("XfrmOutBundleGenError", LINUX_MIB_XFRMOUTBUNDLEGENERROR), SNMP_MIB_ITEM("XfrmOutBundleCheckError", LINUX_MIB_XFRMOUTBUNDLECHECKERROR), SNMP_MIB_ITEM("XfrmOutNoStates", LINUX_MIB_XFRMOUTNOSTATES), SNMP_MIB_ITEM("XfrmOutStateProtoError", LINUX_MIB_XFRMOUTSTATEPROTOERROR), SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR), SNMP_MIB_ITEM("XfrmOutStateSeqError", LINUX_MIB_XFRMOUTSTATESEQERROR), SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED), SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK), SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR), SNMP_MIB_SENTINEL }; static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { unsigned long buff[LINUX_MIB_XFRMMAX]; struct net *net = seq->private; int i; memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); xfrm_state_update_stats(net); snmp_get_cpu_field_batch(buff, xfrm_mib_list, net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, buff[i]); return 0; } int __net_init xfrm_proc_init(struct net *net) { if (!proc_create_net_single("xfrm_stat", 0444, net->proc_net, xfrm_statistics_seq_show, NULL)) return -ENOMEM; return 0; } void xfrm_proc_fini(struct net *net) { remove_proc_entry("xfrm_stat", net->proc_net); }
11 1738 23 725 238 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 /* SPDX-License-Identifier: GPL-2.0 */ /* * Statically sized hash table implementation * (C) 2012 Sasha Levin <levinsasha928@gmail.com> */ #ifndef _LINUX_HASHTABLE_H #define _LINUX_HASHTABLE_H #include <linux/list.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/hash.h> #include <linux/rculist.h> #define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DEFINE_READ_MOSTLY_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] __read_mostly = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } #define DECLARE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] #define HASH_SIZE(name) (ARRAY_SIZE(name)) #define HASH_BITS(name) ilog2(HASH_SIZE(name)) /* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ #define hash_min(val, bits) \ (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) static inline void __hash_init(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) INIT_HLIST_HEAD(&ht[i]); } /** * hash_init - initialize a hash table * @hashtable: hashtable to be initialized * * Calculates the size of the hashtable from the given parameter, otherwise * same as hash_init_size. * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) /** * hash_add - add an object to a hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add(hashtable, node, key) \ hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_add_rcu - add an object to a rcu enabled hashtable * @hashtable: hashtable to add to * @node: the &struct hlist_node of the object to be added * @key: the key of the object to be added */ #define hash_add_rcu(hashtable, node, key) \ hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) /** * hash_hashed - check whether an object is in any hashtable * @node: the &struct hlist_node of the object to be checked */ static inline bool hash_hashed(struct hlist_node *node) { return !hlist_unhashed(node); } static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) { unsigned int i; for (i = 0; i < sz; i++) if (!hlist_empty(&ht[i])) return false; return true; } /** * hash_empty - check whether a hashtable is empty * @hashtable: hashtable to check * * This has to be a macro since HASH_BITS() will not work on pointers since * it calculates the size during preprocessing. */ #define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) /** * hash_del - remove an object from a hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del(struct hlist_node *node) { hlist_del_init(node); } /** * hash_del_rcu - remove an object from a rcu enabled hashtable * @node: &struct hlist_node of the object to remove */ static inline void hash_del_rcu(struct hlist_node *node) { hlist_del_init_rcu(node); } /** * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_rcu(name, bkt, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor * @tmp: a &struct hlist_node used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ #define hash_for_each_safe(name, bkt, tmp, obj, member) \ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ (bkt)++)\ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible(name, obj, member, key) \ hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the * same bucket in an rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_rcu(name, obj, member, key, cond...) \ hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ member, ## cond) /** * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over * * This is the same as hash_for_each_possible_rcu() except that it does * not do any RCU debugging or tracing. */ #define hash_for_each_possible_rcu_notrace(name, obj, member, key) \ hlist_for_each_entry_rcu_notrace(obj, \ &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry * @tmp: a &struct hlist_node used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ #define hash_for_each_possible_safe(name, obj, tmp, member, key) \ hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) #endif
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NET_TIMESTAMPING_H_ #define _LINUX_NET_TIMESTAMPING_H_ #include <uapi/linux/net_tstamp.h> #define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ SOF_TIMESTAMPING_TX_SOFTWARE | \ SOF_TIMESTAMPING_SOFTWARE) #define SOF_TIMESTAMPING_HARDWARE_MASK (SOF_TIMESTAMPING_RX_HARDWARE | \ SOF_TIMESTAMPING_TX_HARDWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) enum hwtstamp_source { HWTSTAMP_SOURCE_UNSPEC, HWTSTAMP_SOURCE_NETDEV, HWTSTAMP_SOURCE_PHYLIB, }; /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * * @flags: see struct hwtstamp_config * @tx_type: see struct hwtstamp_config * @rx_filter: see struct hwtstamp_config * @ifr: pointer to ifreq structure from the original ioctl request, to pass to * a legacy implementation of a lower driver * @copied_to_user: request was passed to a legacy implementation which already * copied the ioctl request back to user space * @source: indication whether timestamps should come from the netdev or from * an attached phylib PHY * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config * exposed to the %SIOCGHWTSTAMP and %SIOCSHWTSTAMP ioctl UAPI. */ struct kernel_hwtstamp_config { int flags; int tx_type; int rx_filter; struct ifreq *ifr; bool copied_to_user; enum hwtstamp_source source; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, const struct hwtstamp_config *cfg) { kernel_cfg->flags = cfg->flags; kernel_cfg->tx_type = cfg->tx_type; kernel_cfg->rx_filter = cfg->rx_filter; } static inline void hwtstamp_config_from_kernel(struct hwtstamp_config *cfg, const struct kernel_hwtstamp_config *kernel_cfg) { cfg->flags = kernel_cfg->flags; cfg->tx_type = kernel_cfg->tx_type; cfg->rx_filter = kernel_cfg->rx_filter; } static inline bool kernel_hwtstamp_config_changed(const struct kernel_hwtstamp_config *a, const struct kernel_hwtstamp_config *b) { return a->flags != b->flags || a->tx_type != b->tx_type || a->rx_filter != b->rx_filter; } #endif /* _LINUX_NET_TIMESTAMPING_H_ */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2016-2017 Linaro Ltd., Rob Herring <robh@kernel.org> */ #ifndef _LINUX_SERDEV_H #define _LINUX_SERDEV_H #include <linux/types.h> #include <linux/device.h> #include <linux/iopoll.h> #include <linux/uaccess.h> #include <linux/termios.h> #include <linux/delay.h> struct serdev_controller; struct serdev_device; /* * serdev device structures */ /** * struct serdev_device_ops - Callback operations for a serdev device * @receive_buf: Function called with data received from device; * returns number of bytes accepted; may sleep. * @write_wakeup: Function called when ready to transmit more data; must * not sleep. */ struct serdev_device_ops { size_t (*receive_buf)(struct serdev_device *, const u8 *, size_t); void (*write_wakeup)(struct serdev_device *); }; /** * struct serdev_device - Basic representation of an serdev device * @dev: Driver model representation of the device. * @nr: Device number on serdev bus. * @ctrl: serdev controller managing this device. * @ops: Device operations. * @write_comp Completion used by serdev_device_write() internally * @write_lock Lock to serialize access when writing data */ struct serdev_device { struct device dev; int nr; struct serdev_controller *ctrl; const struct serdev_device_ops *ops; struct completion write_comp; struct mutex write_lock; }; static inline struct serdev_device *to_serdev_device(struct device *d) { return container_of(d, struct serdev_device, dev); } /** * struct serdev_device_driver - serdev slave device driver * @driver: serdev device drivers should initialize name field of this * structure. * @probe: binds this driver to a serdev device. * @remove: unbinds this driver from the serdev device. */ struct serdev_device_driver { struct device_driver driver; int (*probe)(struct serdev_device *); void (*remove)(struct serdev_device *); }; static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d) { return container_of(d, struct serdev_device_driver, driver); } enum serdev_parity { SERDEV_PARITY_NONE, SERDEV_PARITY_EVEN, SERDEV_PARITY_ODD, }; /* * serdev controller structures */ struct serdev_controller_ops { ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t); void (*write_flush)(struct serdev_controller *); int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); int (*set_parity)(struct serdev_controller *, enum serdev_parity); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); void (*wait_until_sent)(struct serdev_controller *, long); int (*get_tiocm)(struct serdev_controller *); int (*set_tiocm)(struct serdev_controller *, unsigned int, unsigned int); int (*break_ctl)(struct serdev_controller *ctrl, unsigned int break_state); }; /** * struct serdev_controller - interface to the serdev controller * @dev: Driver model representation of the device. * @host: Serial port hardware controller device * @nr: number identifier for this controller/bus. * @serdev: Pointer to slave device for this controller. * @ops: Controller operations. */ struct serdev_controller { struct device dev; struct device *host; unsigned int nr; struct serdev_device *serdev; const struct serdev_controller_ops *ops; }; static inline struct serdev_controller *to_serdev_controller(struct device *d) { return container_of(d, struct serdev_controller, dev); } static inline void *serdev_device_get_drvdata(const struct serdev_device *serdev) { return dev_get_drvdata(&serdev->dev); } static inline void serdev_device_set_drvdata(struct serdev_device *serdev, void *data) { dev_set_drvdata(&serdev->dev, data); } /** * serdev_device_put() - decrement serdev device refcount * @serdev serdev device. */ static inline void serdev_device_put(struct serdev_device *serdev) { if (serdev) put_device(&serdev->dev); } static inline void serdev_device_set_client_ops(struct serdev_device *serdev, const struct serdev_device_ops *ops) { serdev->ops = ops; } static inline void *serdev_controller_get_drvdata(const struct serdev_controller *ctrl) { return ctrl ? dev_get_drvdata(&ctrl->dev) : NULL; } static inline void serdev_controller_set_drvdata(struct serdev_controller *ctrl, void *data) { dev_set_drvdata(&ctrl->dev, data); } /** * serdev_controller_put() - decrement controller refcount * @ctrl serdev controller. */ static inline void serdev_controller_put(struct serdev_controller *ctrl) { if (ctrl) put_device(&ctrl->dev); } struct serdev_device *serdev_device_alloc(struct serdev_controller *); int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); struct serdev_controller *serdev_controller_alloc(struct device *host, struct device *parent, size_t size); int serdev_controller_add(struct serdev_controller *); void serdev_controller_remove(struct serdev_controller *); static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl) { struct serdev_device *serdev = ctrl->serdev; if (!serdev || !serdev->ops->write_wakeup) return; serdev->ops->write_wakeup(serdev); } static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl, const u8 *data, size_t count) { struct serdev_device *serdev = ctrl->serdev; if (!serdev || !serdev->ops->receive_buf) return 0; return serdev->ops->receive_buf(serdev, data, count); } #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); int serdev_device_write_buf(struct serdev_device *, const u8 *, size_t); void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_get_tiocm(struct serdev_device *); int serdev_device_set_tiocm(struct serdev_device *, int, int); int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); /* * serdev device driver functions */ int __serdev_device_driver_register(struct serdev_device_driver *, struct module *); #define serdev_device_driver_register(sdrv) \ __serdev_device_driver_register(sdrv, THIS_MODULE) /** * serdev_device_driver_unregister() - unregister an serdev client driver * @sdrv: the driver to unregister */ static inline void serdev_device_driver_unregister(struct serdev_device_driver *sdrv) { if (sdrv) driver_unregister(&sdrv->driver); } #define module_serdev_device_driver(__serdev_device_driver) \ module_driver(__serdev_device_driver, serdev_device_driver_register, \ serdev_device_driver_unregister) #else static inline int serdev_device_open(struct serdev_device *sdev) { return -ENODEV; } static inline void serdev_device_close(struct serdev_device *sdev) {} static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev, unsigned int baudrate) { return 0; } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} static inline int serdev_device_write_buf(struct serdev_device *serdev, const u8 *buf, size_t count) { return -ENODEV; } static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {} static inline int serdev_device_get_tiocm(struct serdev_device *serdev) { return -EOPNOTSUPP; } static inline int serdev_device_set_tiocm(struct serdev_device *serdev, int set, int clear) { return -EOPNOTSUPP; } static inline int serdev_device_break_ctl(struct serdev_device *serdev, int break_state) { return -EOPNOTSUPP; } static inline ssize_t serdev_device_write(struct serdev_device *sdev, const u8 *buf, size_t count, unsigned long timeout) { return -ENODEV; } static inline void serdev_device_write_flush(struct serdev_device *sdev) {} static inline int serdev_device_write_room(struct serdev_device *sdev) { return 0; } #define serdev_device_driver_register(x) #define serdev_device_driver_unregister(x) #endif /* CONFIG_SERIAL_DEV_BUS */ static inline bool serdev_device_get_cts(struct serdev_device *serdev) { int status = serdev_device_get_tiocm(serdev); return !!(status & TIOCM_CTS); } static inline int serdev_device_wait_for_cts(struct serdev_device *serdev, bool state, int timeout_ms) { bool signal; return readx_poll_timeout(serdev_device_get_cts, serdev, signal, signal == state, 2000, timeout_ms * 1000); } static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enable) { if (enable) return serdev_device_set_tiocm(serdev, TIOCM_RTS, 0); else return serdev_device_set_tiocm(serdev, 0, TIOCM_RTS); } int serdev_device_set_parity(struct serdev_device *serdev, enum serdev_parity parity); /* * serdev hooks into TTY core */ struct tty_port; struct tty_driver; #ifdef CONFIG_SERIAL_DEV_CTRL_TTYPORT struct device *serdev_tty_port_register(struct tty_port *port, struct device *host, struct device *parent, struct tty_driver *drv, int idx); int serdev_tty_port_unregister(struct tty_port *port); #else static inline struct device *serdev_tty_port_register(struct tty_port *port, struct device *host, struct device *parent, struct tty_driver *drv, int idx) { return ERR_PTR(-ENODEV); } static inline int serdev_tty_port_unregister(struct tty_port *port) { return -ENODEV; } #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */ struct acpi_resource; struct acpi_resource_uart_serialbus; #ifdef CONFIG_ACPI bool serdev_acpi_get_uart_resource(struct acpi_resource *ares, struct acpi_resource_uart_serialbus **uart); #else static inline bool serdev_acpi_get_uart_resource(struct acpi_resource *ares, struct acpi_resource_uart_serialbus **uart) { return false; } #endif /* CONFIG_ACPI */ #endif /*_LINUX_SERDEV_H */
8 8 7 8 7 2 1 2 2 2 2 2 1 2 2 1 2 4 2 2 4 4 8 8 8 8 8 8 8 8 7 3 1 2 2 1 1 3 3 1 3 1 1 1 1 1 13 12 11 11 3 2 10 10 1 1 7 17 18 3 2 2 2 13 13 7 18 10 10 10 11 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 /* * Copyright (C) 2017 Netronome Systems, Inc. * * This software is licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this * source tree. * * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mutex.h> #include <linux/rtnetlink.h> #include <net/pkt_cls.h> #include "netdevsim.h" #define pr_vlog(env, fmt, ...) \ bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__) struct nsim_bpf_bound_prog { struct nsim_dev *nsim_dev; struct bpf_prog *prog; struct dentry *ddir; const char *state; bool is_loaded; struct list_head l; }; #define NSIM_BPF_MAX_KEYS 2 struct nsim_bpf_bound_map { struct netdevsim *ns; struct bpf_offloaded_map *map; struct mutex mutex; struct nsim_map_entry { void *key; void *value; } entry[NSIM_BPF_MAX_KEYS]; struct list_head l; }; static int nsim_bpf_string_show(struct seq_file *file, void *data) { const char **str = file->private; if (*str) seq_printf(file, "%s\n", *str); return 0; } DEFINE_SHOW_ATTRIBUTE(nsim_bpf_string); static int nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) { struct nsim_bpf_bound_prog *state; int ret = 0; state = env->prog->aux->offload->dev_priv; if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx) msleep(state->nsim_dev->bpf_bind_verifier_delay); if (insn_idx == env->prog->len - 1) { pr_vlog(env, "Hello from netdevsim!\n"); if (!state->nsim_dev->bpf_bind_verifier_accept) ret = -EOPNOTSUPP; } return ret; } static int nsim_bpf_finalize(struct bpf_verifier_env *env) { return 0; } static bool nsim_xdp_offload_active(struct netdevsim *ns) { return ns->xdp_hw.prog; } static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) { struct nsim_bpf_bound_prog *state; if (!prog || !bpf_prog_is_offloaded(prog->aux)) return; state = prog->aux->offload->dev_priv; state->is_loaded = loaded; } static int nsim_bpf_offload(struct netdevsim *ns, struct bpf_prog *prog, bool oldprog) { nsim_prog_set_loaded(ns->bpf_offloaded, false); WARN(!!ns->bpf_offloaded != oldprog, "bad offload state, expected offload %sto be active", oldprog ? "" : "not "); ns->bpf_offloaded = prog; ns->bpf_offloaded_id = prog ? prog->aux->id : 0; nsim_prog_set_loaded(prog, true); return 0; } int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct tc_cls_bpf_offload *cls_bpf = type_data; struct bpf_prog *prog = cls_bpf->prog; struct netdevsim *ns = cb_priv; struct bpf_prog *oldprog; if (type != TC_SETUP_CLSBPF) { NSIM_EA(cls_bpf->common.extack, "only offload of BPF classifiers supported"); return -EOPNOTSUPP; } if (!tc_cls_can_offload_and_chain0(ns->netdev, &cls_bpf->common)) return -EOPNOTSUPP; if (cls_bpf->common.protocol != htons(ETH_P_ALL)) { NSIM_EA(cls_bpf->common.extack, "only ETH_P_ALL supported as filter protocol"); return -EOPNOTSUPP; } if (!ns->bpf_tc_accept) { NSIM_EA(cls_bpf->common.extack, "netdevsim configured to reject BPF TC offload"); return -EOPNOTSUPP; } /* Note: progs without skip_sw will probably not be dev bound */ if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) { NSIM_EA(cls_bpf->common.extack, "netdevsim configured to reject unbound programs"); return -EOPNOTSUPP; } if (cls_bpf->command != TC_CLSBPF_OFFLOAD) return -EOPNOTSUPP; oldprog = cls_bpf->oldprog; /* Don't remove if oldprog doesn't match driver's state */ if (ns->bpf_offloaded != oldprog) { oldprog = NULL; if (!cls_bpf->prog) return 0; if (ns->bpf_offloaded) { NSIM_EA(cls_bpf->common.extack, "driver and netdev offload states mismatch"); return -EBUSY; } } return nsim_bpf_offload(ns, cls_bpf->prog, oldprog); } int nsim_bpf_disable_tc(struct netdevsim *ns) { if (ns->bpf_offloaded && !nsim_xdp_offload_active(ns)) return -EBUSY; return 0; } static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf) { if (!nsim_xdp_offload_active(ns) && !bpf->prog) return 0; if (!nsim_xdp_offload_active(ns) && bpf->prog && ns->bpf_offloaded) { NSIM_EA(bpf->extack, "TC program is already loaded"); return -EBUSY; } return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns)); } static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, struct xdp_attachment_info *xdp) { int err; if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); return -EOPNOTSUPP; } if (bpf->command == XDP_SETUP_PROG_HW && !ns->bpf_xdpoffload_accept) { NSIM_EA(bpf->extack, "XDP offload disabled in DebugFS"); return -EOPNOTSUPP; } if (bpf->command == XDP_SETUP_PROG_HW) { err = nsim_xdp_offload_prog(ns, bpf); if (err) return err; } xdp_attachment_setup(xdp, bpf); return 0; } static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev, struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; char name[16]; int ret; state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; state->nsim_dev = nsim_dev; state->prog = prog; state->state = "verify"; /* Program id is not populated yet when we create the state. */ sprintf(name, "%u", nsim_dev->prog_id_gen++); state->ddir = debugfs_create_dir(name, nsim_dev->ddir_bpf_bound_progs); if (IS_ERR(state->ddir)) { ret = PTR_ERR(state->ddir); kfree(state); return ret; } debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id); debugfs_create_file("state", 0400, state->ddir, &state->state, &nsim_bpf_string_fops); debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded); list_add_tail(&state->l, &nsim_dev->bpf_bound_progs); prog->aux->offload->dev_priv = state; return 0; } static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { struct nsim_dev *nsim_dev = bpf_offload_dev_priv(prog->aux->offload->offdev); if (!nsim_dev->bpf_bind_accept) return -EOPNOTSUPP; return nsim_bpf_create_prog(nsim_dev, prog); } static int nsim_bpf_translate(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state = prog->aux->offload->dev_priv; state->state = "xlated"; return 0; } static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; state = prog->aux->offload->dev_priv; WARN(state->is_loaded, "offload state destroyed while program still bound"); debugfs_remove_recursive(state->ddir); list_del(&state->l); kfree(state); } static const struct bpf_prog_offload_ops nsim_bpf_dev_ops = { .insn_hook = nsim_bpf_verify_insn, .finalize = nsim_bpf_finalize, .prepare = nsim_bpf_verifier_prep, .translate = nsim_bpf_translate, .destroy = nsim_bpf_destroy_prog, }; static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) { if (bpf->prog && bpf->prog->aux->offload) { NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv"); return -EINVAL; } if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) { NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled"); return -EINVAL; } return 0; } static int nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) { struct nsim_bpf_bound_prog *state; if (!bpf->prog) return 0; if (!bpf_prog_is_offloaded(bpf->prog->aux)) { NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); return -EINVAL; } state = bpf->prog->aux->offload->dev_priv; if (WARN_ON(strcmp(state->state, "xlated"))) { NSIM_EA(bpf->extack, "offloading program in bad state"); return -EINVAL; } return 0; } static bool nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key) { return e->key && !memcmp(key, e->key, map->key_size); } static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; unsigned int i; for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key)) return i; return -ENOENT; } static int nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].key) return -ENOMEM; nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].value) { kfree(nmap->entry[idx].key); nmap->entry[idx].key = NULL; return -ENOMEM; } return 0; } static int nsim_map_get_next_key(struct bpf_offloaded_map *offmap, void *key, void *next_key) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx = -ENOENT; mutex_lock(&nmap->mutex); if (key) idx = nsim_map_key_find(offmap, key); if (idx == -ENOENT) idx = 0; else idx++; for (; idx < ARRAY_SIZE(nmap->entry); idx++) { if (nmap->entry[idx].key) { memcpy(next_key, nmap->entry[idx].key, offmap->map.key_size); break; } } mutex_unlock(&nmap->mutex); if (idx == ARRAY_SIZE(nmap->entry)) return -ENOENT; return 0; } static int nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx; mutex_lock(&nmap->mutex); idx = nsim_map_key_find(offmap, key); if (idx >= 0) memcpy(value, nmap->entry[idx].value, offmap->map.value_size); mutex_unlock(&nmap->mutex); return idx < 0 ? idx : 0; } static int nsim_map_update_elem(struct bpf_offloaded_map *offmap, void *key, void *value, u64 flags) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx, err = 0; mutex_lock(&nmap->mutex); idx = nsim_map_key_find(offmap, key); if (idx < 0 && flags == BPF_EXIST) { err = idx; goto exit_unlock; } if (idx >= 0 && flags == BPF_NOEXIST) { err = -EEXIST; goto exit_unlock; } if (idx < 0) { for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++) if (!nmap->entry[idx].key) break; if (idx == ARRAY_SIZE(nmap->entry)) { err = -E2BIG; goto exit_unlock; } err = nsim_map_alloc_elem(offmap, idx); if (err) goto exit_unlock; } memcpy(nmap->entry[idx].key, key, offmap->map.key_size); memcpy(nmap->entry[idx].value, value, offmap->map.value_size); exit_unlock: mutex_unlock(&nmap->mutex); return err; } static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; int idx; if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) return -EINVAL; mutex_lock(&nmap->mutex); idx = nsim_map_key_find(offmap, key); if (idx >= 0) { kfree(nmap->entry[idx].key); kfree(nmap->entry[idx].value); memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx])); } mutex_unlock(&nmap->mutex); return idx < 0 ? idx : 0; } static const struct bpf_map_dev_ops nsim_bpf_map_ops = { .map_get_next_key = nsim_map_get_next_key, .map_lookup_elem = nsim_map_lookup_elem, .map_update_elem = nsim_map_update_elem, .map_delete_elem = nsim_map_delete_elem, }; static int nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) { struct nsim_bpf_bound_map *nmap; int i, err; if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY && offmap->map.map_type != BPF_MAP_TYPE_HASH)) return -EINVAL; if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS) return -ENOMEM; if (offmap->map.map_flags) return -EINVAL; nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT); if (!nmap) return -ENOMEM; offmap->dev_priv = nmap; nmap->ns = ns; nmap->map = offmap; mutex_init(&nmap->mutex); if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) { for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { u32 *key; err = nsim_map_alloc_elem(offmap, i); if (err) goto err_free; key = nmap->entry[i].key; *key = i; memset(nmap->entry[i].value, 0, offmap->map.value_size); } } offmap->dev_ops = &nsim_bpf_map_ops; list_add_tail(&nmap->l, &ns->nsim_dev->bpf_bound_maps); return 0; err_free: while (--i >= 0) { kfree(nmap->entry[i].key); kfree(nmap->entry[i].value); } kfree(nmap); return err; } static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; unsigned int i; for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { kfree(nmap->entry[i].key); kfree(nmap->entry[i].value); } list_del_init(&nmap->l); mutex_destroy(&nmap->mutex); kfree(nmap); } int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct netdevsim *ns = netdev_priv(dev); int err; ASSERT_RTNL(); switch (bpf->command) { case XDP_SETUP_PROG: err = nsim_setup_prog_checks(ns, bpf); if (err) return err; return nsim_xdp_set_prog(ns, bpf, &ns->xdp); case XDP_SETUP_PROG_HW: err = nsim_setup_prog_hw_checks(ns, bpf); if (err) return err; return nsim_xdp_set_prog(ns, bpf, &ns->xdp_hw); case BPF_OFFLOAD_MAP_ALLOC: if (!ns->bpf_map_accept) return -EOPNOTSUPP; return nsim_bpf_map_alloc(ns, bpf->offmap); case BPF_OFFLOAD_MAP_FREE: nsim_bpf_map_free(bpf->offmap); return 0; default: return -EINVAL; } } int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) { int err; INIT_LIST_HEAD(&nsim_dev->bpf_bound_progs); INIT_LIST_HEAD(&nsim_dev->bpf_bound_maps); nsim_dev->ddir_bpf_bound_progs = debugfs_create_dir("bpf_bound_progs", nsim_dev->ddir); if (IS_ERR(nsim_dev->ddir_bpf_bound_progs)) return PTR_ERR(nsim_dev->ddir_bpf_bound_progs); nsim_dev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, nsim_dev); err = PTR_ERR_OR_ZERO(nsim_dev->bpf_dev); if (err) return err; nsim_dev->bpf_bind_accept = true; debugfs_create_bool("bpf_bind_accept", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_accept); debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_verifier_delay); nsim_dev->bpf_bind_verifier_accept = true; debugfs_create_bool("bpf_bind_verifier_accept", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_verifier_accept); return 0; } void nsim_bpf_dev_exit(struct nsim_dev *nsim_dev) { WARN_ON(!list_empty(&nsim_dev->bpf_bound_progs)); WARN_ON(!list_empty(&nsim_dev->bpf_bound_maps)); bpf_offload_dev_destroy(nsim_dev->bpf_dev); } int nsim_bpf_init(struct netdevsim *ns) { struct dentry *ddir = ns->nsim_dev_port->ddir; int err; err = bpf_offload_dev_netdev_register(ns->nsim_dev->bpf_dev, ns->netdev); if (err) return err; debugfs_create_u32("bpf_offloaded_id", 0400, ddir, &ns->bpf_offloaded_id); ns->bpf_tc_accept = true; debugfs_create_bool("bpf_tc_accept", 0600, ddir, &ns->bpf_tc_accept); debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ddir, &ns->bpf_tc_non_bound_accept); ns->bpf_xdpdrv_accept = true; debugfs_create_bool("bpf_xdpdrv_accept", 0600, ddir, &ns->bpf_xdpdrv_accept); ns->bpf_xdpoffload_accept = true; debugfs_create_bool("bpf_xdpoffload_accept", 0600, ddir, &ns->bpf_xdpoffload_accept); ns->bpf_map_accept = true; debugfs_create_bool("bpf_map_accept", 0600, ddir, &ns->bpf_map_accept); return 0; } void nsim_bpf_uninit(struct netdevsim *ns) { WARN_ON(ns->xdp.prog); WARN_ON(ns->xdp_hw.prog); WARN_ON(ns->bpf_offloaded); bpf_offload_dev_netdev_unregister(ns->nsim_dev->bpf_dev, ns->netdev); }
370 14 8 8 2 427 41 50 427 50 26 7 3 3 3 3 47 46 45 57 53 53 19 45 45 10 365 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ARCH_X86_KVM_CPUID_H #define ARCH_X86_KVM_CPUID_H #include "reverse_cpuid.h" #include <asm/cpu.h> #include <asm/processor.h> #include <uapi/asm/kvm_para.h> extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, u32 function, u32 index); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries, unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); void __init kvm_init_xstate_sizes(void); u32 xstate_required_size(u64 xstate_bv, bool compacted); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu); static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) { return vcpu->arch.maxphyaddr; } static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { return !(gpa & vcpu->arch.reserved_gpa_bits); } static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, gpa_t alignment) { return IS_ALIGNED(gpa, alignment) && kvm_vcpu_is_legal_gpa(vcpu, gpa); } static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) { return kvm_vcpu_is_legal_aligned_gpa(vcpu, gpa, PAGE_SIZE); } static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry, unsigned int leaf) { u32 *reg = cpuid_entry_get_reg(entry, leaf * 32); BUILD_BUG_ON(leaf >= ARRAY_SIZE(kvm_cpu_caps)); *reg = kvm_cpu_caps[leaf]; } static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned int x86_feature) { const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); struct kvm_cpuid_entry2 *entry; entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index); if (!entry) return NULL; return __cpuid_entry_get_reg(entry, cpuid.reg); } static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned int x86_feature) { u32 *reg; reg = guest_cpuid_get_register(vcpu, x86_feature); if (!reg) return false; return *reg & __feature_bit(x86_feature); } static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned int x86_feature) { u32 *reg; reg = guest_cpuid_get_register(vcpu, x86_feature); if (reg) *reg &= ~__feature_bit(x86_feature); } static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) { return vcpu->arch.is_amd_compatible; } static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) { return !guest_cpuid_is_amd_compatible(vcpu); } static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x1); if (!best) return -1; return x86_family(best->eax); } static inline int guest_cpuid_model(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x1); if (!best) return -1; return x86_model(best->eax); } static inline bool cpuid_model_is_consistent(struct kvm_vcpu *vcpu) { return boot_cpu_data.x86_model == guest_cpuid_model(vcpu); } static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x1); if (!best) return -1; return x86_stepping(best->eax); } static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu) { return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) || guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) || guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)); } static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu) { return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) || guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB) || guest_cpuid_has(vcpu, X86_FEATURE_SBPB)); } static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) { return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; } static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu) { return vcpu->arch.msr_misc_features_enables & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; } static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); } static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature); } static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); reverse_cpuid_check(x86_leaf); return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature); } static __always_inline bool kvm_cpu_cap_has(unsigned int x86_feature) { return !!kvm_cpu_cap_get(x86_feature); } static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature) { if (boot_cpu_has(x86_feature)) kvm_cpu_cap_set(x86_feature); } static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu, unsigned int kvm_feature) { if (!vcpu->arch.pv_cpuid.enforce) return true; return vcpu->arch.pv_cpuid.features & (1u << kvm_feature); } enum kvm_governed_features { #define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x, #include "governed_features.h" KVM_NR_GOVERNED_FEATURES }; static __always_inline int kvm_governed_feature_index(unsigned int x86_feature) { switch (x86_feature) { #define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x; #include "governed_features.h" default: return -1; } } static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature) { return kvm_governed_feature_index(x86_feature) >= 0; } static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu, unsigned int x86_feature) { BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature)); __set_bit(kvm_governed_feature_index(x86_feature), vcpu->arch.governed_features.enabled); } static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu, unsigned int x86_feature) { if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature)) kvm_governed_feature_set(vcpu, x86_feature); } static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu, unsigned int x86_feature) { BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature)); return test_bit(kvm_governed_feature_index(x86_feature), vcpu->arch.governed_features.enabled); } static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (guest_can_use(vcpu, X86_FEATURE_LAM)) cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57); return kvm_vcpu_is_legal_gpa(vcpu, cr3); } #endif
248 1 247 248 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 IBM Corporation * * Author: * Mimi Zohar <zohar@us.ibm.com> */ #include <linux/xattr.h> #include <linux/evm.h> int posix_xattr_acl(const char *xattr) { int xattr_len = strlen(xattr); if ((strlen(XATTR_NAME_POSIX_ACL_ACCESS) == xattr_len) && (strncmp(XATTR_NAME_POSIX_ACL_ACCESS, xattr, xattr_len) == 0)) return 1; if ((strlen(XATTR_NAME_POSIX_ACL_DEFAULT) == xattr_len) && (strncmp(XATTR_NAME_POSIX_ACL_DEFAULT, xattr, xattr_len) == 0)) return 1; return 0; }
116 188 189 189 189 45 45 44 44 44 44 44 30 45 14 14 14 7 7 229 229 230 230 229 203 229 203 230 230 229 200 223 230 61 86 31 223 48 40 223 223 1 223 197 40 198 67 9 58 1 1 9 222 41 189 73 128 70 74 11 63 11 63 189 188 42 189 192 192 192 189 26 181 189 15 12 9 9 192 192 192 32 32 30 27 26 9 32 4 3 3 1 3 3 4 140 122 140 136 118 5 5 1 1 1 3 8 6 8 3 114 114 114 28 28 27 7 137 2 137 1 136 131 8 1 7 127 126 1 125 122 122 119 119 2 1 1 2 1 1 142 142 4 4 1 4 1 4 1 4 4 4 4 4 130 131 131 143 143 143 143 143 100 43 43 9 9 43 131 1 1 1 1 51 51 56 51 51 51 7 2 1 50 8 3 2 51 72 57 56 56 55 5 55 7 55 45 10 51 39 3 55 12 55 55 14 55 5 55 6 54 7 48 9 55 13 9 55 12 55 16 55 12 55 3 55 2 55 3 55 4 55 9 55 12 55 1 55 1 55 4 55 1 55 2 55 2 55 1 54 1 55 2 1 1 55 55 56 18 19 73 113 113 113 113 113 3 113 113 131 131 131 131 131 131 131 131 131 18 41 42 42 42 114 115 114 114 4 111 113 113 113 113 113 2 113 113 113 113 113 113 113 113 1 1 1 115 120 119 118 116 115 115 3 113 113 18 1 1 125 126 126 120 126 6 121 117 116 2 1 1 116 126 7 126 126 143 143 9 143 143 20 20 20 1 20 20 20 20 20 48 48 48 37 19 37 5 1 5 93 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. */ #include "fuse_i.h" #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/exportfs.h> #include <linux/posix_acl.h> #include <linux/pid_namespace.h> #include <uapi/linux/magic.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned int fuse_max_pages_limit = 256; unsigned max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, &max_user_bgreq, 0644); __MODULE_PARM_TYPE(max_user_bgreq, "uint"); MODULE_PARM_DESC(max_user_bgreq, "Global limit for the maximum number of backgrounded requests an " "unprivileged user can set"); unsigned max_user_congthresh; module_param_call(max_user_congthresh, set_global_limit, param_get_uint, &max_user_congthresh, 0644); __MODULE_PARM_TYPE(max_user_congthresh, "uint"); MODULE_PARM_DESC(max_user_congthresh, "Global limit for the maximum congestion threshold an " "unprivileged user can set"); #define FUSE_DEFAULT_BLKSIZE 512 /** Maximum number of outstanding background requests */ #define FUSE_DEFAULT_MAX_BACKGROUND 12 /** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) #ifdef CONFIG_BLOCK static struct file_system_type fuseblk_fs_type; #endif struct fuse_forget_link *fuse_alloc_forget(void) { return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) { struct fuse_submount_lookup *sl; sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); if (!sl) return NULL; sl->forget = fuse_alloc_forget(); if (!sl->forget) goto out_free; return sl; out_free: kfree(sl); return NULL; } static struct inode *fuse_alloc_inode(struct super_block *sb) { struct fuse_inode *fi; fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL); if (!fi) return NULL; fi->i_time = 0; fi->inval_mask = ~0; fi->nodeid = 0; fi->nlookup = 0; fi->attr_version = 0; fi->orig_ino = 0; fi->state = 0; fi->submount_lookup = NULL; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); if (!fi->forget) goto out_free; if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) goto out_free_forget; if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_inode_backing_set(fi, NULL); return &fi->inode; out_free_forget: kfree(fi->forget); out_free: kmem_cache_free(fuse_inode_cachep, fi); return NULL; } static void fuse_free_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); mutex_destroy(&fi->mutex); kfree(fi->forget); #ifdef CONFIG_FUSE_DAX kfree(fi->dax); #endif if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_backing_put(fuse_inode_backing(fi)); kmem_cache_free(fuse_inode_cachep, fi); } static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, struct fuse_submount_lookup *sl) { if (!refcount_dec_and_test(&sl->count)) return; fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); sl->forget = NULL; kfree(sl); } static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); /* Will write inode on close/munmap and in all other dirtiers */ WARN_ON(inode->i_state & I_DIRTY_INODE); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); if (FUSE_IS_DAX(inode)) fuse_dax_inode_cleanup(inode); if (fi->nlookup) { fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); fi->forget = NULL; } if (fi->submount_lookup) { fuse_cleanup_submount_lookup(fc, fi->submount_lookup); fi->submount_lookup = NULL; } /* * Evict of non-deleted inode may race with outstanding * LOOKUP/READDIRPLUS requests and result in inconsistency when * the request finishes. Deal with that here by bumping a * counter that can be compared to the starting value. */ if (inode->i_nlink > 0) atomic64_inc(&fc->evict_ctr); } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(fi->iocachectr != 0); WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } } static int fuse_reconfigure(struct fs_context *fsc) { struct super_block *sb = fsc->root->d_sb; sync_filesystem(sb); if (fsc->sb_flags & SB_MANDLOCK) return -EINVAL; return 0; } /* * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down * so that it will fit. */ static ino_t fuse_squash_ino(u64 ino64) { ino_t ino = (ino_t) ino64; if (sizeof(ino_t) < sizeof(u64)) ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; return ino; } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u32 cache_mask, u64 evict_ctr) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); lockdep_assert_held(&fi->lock); /* * Clear basic stats from invalid mask. * * Don't do this if this is coming from a fuse_iget() call and there * might have been a racing evict which would've invalidated the result * if the attr_version would've been preserved. * * !evict_ctr -> this is create * fi->attr_version != 0 -> this is not a new inode * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request */ if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); set_nlink(inode, attr->nlink); inode->i_uid = make_kuid(fc->user_ns, attr->uid); inode->i_gid = make_kgid(fc->user_ns, attr->gid); inode->i_blocks = attr->blocks; /* Sanitize nsecs */ attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); inode_set_atime(inode, attr->atime, attr->atimensec); /* mtime from server may be stale due to local buffered write */ if (!(cache_mask & STATX_MTIME)) { inode_set_mtime(inode, attr->mtime, attr->mtimensec); } if (!(cache_mask & STATX_CTIME)) { inode_set_ctime(inode, attr->ctime, attr->ctimensec); } if (sx) { /* Sanitize nsecs */ sx->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); /* * Btime has been queried, cache is valid (whether or not btime * is available or not) so clear STATX_BTIME from inval_mask. * * Availability of the btime attribute is indicated in * FUSE_I_BTIME */ set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); if (sx->mask & STATX_BTIME) { set_bit(FUSE_I_BTIME, &fi->state); fi->i_btime.tv_sec = sx->btime.tv_sec; fi->i_btime.tv_nsec = sx->btime.tv_nsec; } } if (attr->blksize != 0) inode->i_blkbits = ilog2(attr->blksize); else inode->i_blkbits = inode->i_sb->s_blocksize_bits; /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the * check in may_delete(). */ fi->orig_i_mode = inode->i_mode; if (!fc->default_permissions) inode->i_mode &= ~S_ISVTX; fi->orig_ino = attr->ino; /* * We are refreshing inode data and it is possible that another * client set suid/sgid or security.capability xattr. So clear * S_NOSEC. Ideally, we could have cleared it only if suid/sgid * was set or if security.capability xattr was set. But we don't * know if security.capability has been set or not. So clear it * anyway. Its less efficient but should be safe. */ inode->i_flags &= ~S_NOSEC; } u32 fuse_get_cache_mask(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) return 0; return STATX_MTIME | STATX_CTIME | STATX_SIZE; } static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u64 attr_version, u64 evict_ctr) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; spin_lock(&fi->lock); /* * In case of writeback_cache enabled, writes update mtime, ctime and * may update i_size. In these cases trust the cached value in the * inode. */ cache_mask = fuse_get_cache_mask(inode); if (cache_mask & STATX_SIZE) attr->size = i_size_read(inode); if (cache_mask & STATX_MTIME) { attr->mtime = inode_get_mtime_sec(inode); attr->mtimensec = inode_get_mtime_nsec(inode); } if (cache_mask & STATX_CTIME) { attr->ctime = inode_get_ctime_sec(inode); attr->ctimensec = inode_get_ctime_nsec(inode); } if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fi->lock); return; } old_mtime = inode_get_mtime(inode); fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, evict_ctr); oldsize = inode->i_size; /* * In case of writeback_cache enabled, the cached writes beyond EOF * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ if (!(cache_mask & STATX_SIZE)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { truncate_pagecache(inode, attr->size); if (!fc->explicit_inval_data) inval = true; } else if (fc->auto_inval_data) { struct timespec64 new_mtime = { .tv_sec = attr->mtime, .tv_nsec = attr->mtimensec, }; /* * Auto inval mode also checks and invalidates if mtime * has changed. */ if (!timespec64_equal(&old_mtime, &new_mtime)) inval = true; } if (inval) invalidate_inode_pages2(inode->i_mapping); } if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_dontcache(inode, attr->flags); } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u64 attr_version) { fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); } static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, u64 nodeid) { sl->nodeid = nodeid; refcount_set(&sl->count, 1); } static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc) { inode->i_mode = attr->mode & S_IFMT; inode->i_size = attr->size; inode_set_mtime(inode, attr->mtime, attr->mtimensec); inode_set_ctime(inode, attr->ctime, attr->ctimensec); if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); fuse_init_file_inode(inode, attr->flags); } else if (S_ISDIR(inode->i_mode)) fuse_init_dir(inode); else if (S_ISLNK(inode->i_mode)) fuse_init_symlink(inode); else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { fuse_init_common(inode); init_special_inode(inode, inode->i_mode, new_decode_dev(attr->rdev)); } else BUG(); /* * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL * so they see the exact same behavior as before. */ if (!fc->posix_acl) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; } static int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) return 1; else return 0; } static int fuse_inode_set(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; get_fuse_inode(inode)->nodeid = nodeid; return 0; } struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version, u64 evict_ctr) { struct inode *inode; struct fuse_inode *fi; struct fuse_conn *fc = get_fuse_conn_super(sb); /* * Auto mount points get their node id from the submount root, which is * not a unique identifier within this filesystem. * * To avoid conflicts, do not place submount points into the inode hash * table. */ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && S_ISDIR(attr->mode)) { struct fuse_inode *fi; inode = new_inode(sb); if (!inode) return NULL; fuse_init_inode(inode, attr, fc); fi = get_fuse_inode(inode); fi->nodeid = nodeid; fi->submount_lookup = fuse_alloc_submount_lookup(); if (!fi->submount_lookup) { iput(inode); return NULL; } /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ fuse_init_submount_lookup(fi->submount_lookup, nodeid); inode->i_flags |= S_AUTOMOUNT; goto done; } retry: inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); if (!inode) return NULL; if ((inode->i_state & I_NEW)) { inode->i_flags |= S_NOATIME; if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; fuse_init_inode(inode, attr, fc); unlock_new_inode(inode); } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); if (inode != d_inode(sb->s_root)) { remove_inode_hash(inode); iput(inode); goto retry; } } fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); done: fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, evict_ctr); return inode; } struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, struct fuse_mount **fm) { struct fuse_mount *fm_iter; struct inode *inode; WARN_ON(!rwsem_is_locked(&fc->killsb)); list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { if (!fm_iter->sb) continue; inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); if (inode) { if (fm) *fm = fm_iter; return inode; } } return NULL; } int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, loff_t offset, loff_t len) { struct fuse_inode *fi; struct inode *inode; pgoff_t pg_start; pgoff_t pg_end; inode = fuse_ilookup(fc, nodeid, NULL); if (!inode) return -ENOENT; fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); spin_unlock(&fi->lock); fuse_invalidate_attr(inode); forget_all_cached_acls(inode); if (offset >= 0) { pg_start = offset >> PAGE_SHIFT; if (len <= 0) pg_end = -1; else pg_end = (offset + len - 1) >> PAGE_SHIFT; invalidate_inode_pages2_range(inode->i_mapping, pg_start, pg_end); } iput(inode); return 0; } bool fuse_lock_inode(struct inode *inode) { bool locked = false; if (!get_fuse_conn(inode)->parallel_dirops) { mutex_lock(&get_fuse_inode(inode)->mutex); locked = true; } return locked; } void fuse_unlock_inode(struct inode *inode, bool locked) { if (locked) mutex_unlock(&get_fuse_inode(inode)->mutex); } static void fuse_umount_begin(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc->no_force_umount) return; fuse_abort_conn(fc); // Only retire block-device-based superblocks. if (sb->s_bdev != NULL) retire_super(sb); } static void fuse_send_destroy(struct fuse_mount *fm) { if (fm->fc->conn_init) { FUSE_ARGS(args); args.opcode = FUSE_DESTROY; args.force = true; args.nocreds = true; fuse_simple_request(fm, &args); } } static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) { stbuf->f_type = FUSE_SUPER_MAGIC; stbuf->f_bsize = attr->bsize; stbuf->f_frsize = attr->frsize; stbuf->f_blocks = attr->blocks; stbuf->f_bfree = attr->bfree; stbuf->f_bavail = attr->bavail; stbuf->f_files = attr->files; stbuf->f_ffree = attr->ffree; stbuf->f_namelen = attr->namelen; /* fsid is left zero */ } static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct fuse_mount *fm = get_fuse_mount_super(sb); FUSE_ARGS(args); struct fuse_statfs_out outarg; int err; if (!fuse_allow_current_process(fm->fc)) { buf->f_type = FUSE_SUPER_MAGIC; return 0; } memset(&outarg, 0, sizeof(outarg)); args.in_numargs = 0; args.opcode = FUSE_STATFS; args.nodeid = get_node_id(d_inode(dentry)); args.out_numargs = 1; args.out_args[0].size = sizeof(outarg); args.out_args[0].value = &outarg; err = fuse_simple_request(fm, &args); if (!err) convert_fuse_statfs(buf, &outarg.st); return err; } static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) { struct fuse_sync_bucket *bucket; bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL); if (bucket) { init_waitqueue_head(&bucket->waitq); /* Initial active count */ atomic_set(&bucket->count, 1); } return bucket; } static void fuse_sync_fs_writes(struct fuse_conn *fc) { struct fuse_sync_bucket *bucket, *new_bucket; int count; new_bucket = fuse_sync_bucket_alloc(); spin_lock(&fc->lock); bucket = rcu_dereference_protected(fc->curr_bucket, 1); count = atomic_read(&bucket->count); WARN_ON(count < 1); /* No outstanding writes? */ if (count == 1) { spin_unlock(&fc->lock); kfree(new_bucket); return; } /* * Completion of new bucket depends on completion of this bucket, so add * one more count. */ atomic_inc(&new_bucket->count); rcu_assign_pointer(fc->curr_bucket, new_bucket); spin_unlock(&fc->lock); /* * Drop initial active count. At this point if all writes in this and * ancestor buckets complete, the count will go to zero and this task * will be woken up. */ atomic_dec(&bucket->count); wait_event(bucket->waitq, atomic_read(&bucket->count) == 0); /* Drop temp count on descendant bucket */ fuse_sync_bucket_dec(new_bucket); kfree_rcu(bucket, rcu); } static int fuse_sync_fs(struct super_block *sb, int wait) { struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; struct fuse_syncfs_in inarg; FUSE_ARGS(args); int err; /* * Userspace cannot handle the wait == 0 case. Avoid a * gratuitous roundtrip. */ if (!wait) return 0; /* The filesystem is being unmounted. Nothing to do. */ if (!sb->s_root) return 0; if (!fc->sync_fs) return 0; fuse_sync_fs_writes(fc); memset(&inarg, 0, sizeof(inarg)); args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.opcode = FUSE_SYNCFS; args.nodeid = get_node_id(sb->s_root->d_inode); args.out_numargs = 0; err = fuse_simple_request(fm, &args); if (err == -ENOSYS) { fc->sync_fs = 0; err = 0; } return err; } enum { OPT_SOURCE, OPT_SUBTYPE, OPT_FD, OPT_ROOTMODE, OPT_USER_ID, OPT_GROUP_ID, OPT_DEFAULT_PERMISSIONS, OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, OPT_ERR }; static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_string ("source", OPT_SOURCE), fsparam_u32 ("fd", OPT_FD), fsparam_u32oct ("rootmode", OPT_ROOTMODE), fsparam_uid ("user_id", OPT_USER_ID), fsparam_gid ("group_id", OPT_GROUP_ID), fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), fsparam_flag ("allow_other", OPT_ALLOW_OTHER), fsparam_u32 ("max_read", OPT_MAX_READ), fsparam_u32 ("blksize", OPT_BLKSIZE), fsparam_string ("subtype", OPT_SUBTYPE), {} }; static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) { struct fs_parse_result result; struct fuse_fs_context *ctx = fsc->fs_private; int opt; kuid_t kuid; kgid_t kgid; if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { /* * Ignore options coming from mount(MS_REMOUNT) for backward * compatibility. */ if (fsc->oldapi) return 0; return invalfc(fsc, "No changes allowed in reconfigure"); } opt = fs_parse(fsc, fuse_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case OPT_SOURCE: if (fsc->source) return invalfc(fsc, "Multiple sources specified"); fsc->source = param->string; param->string = NULL; break; case OPT_SUBTYPE: if (ctx->subtype) return invalfc(fsc, "Multiple subtypes specified"); ctx->subtype = param->string; param->string = NULL; return 0; case OPT_FD: ctx->fd = result.uint_32; ctx->fd_present = true; break; case OPT_ROOTMODE: if (!fuse_valid_type(result.uint_32)) return invalfc(fsc, "Invalid rootmode"); ctx->rootmode = result.uint_32; ctx->rootmode_present = true; break; case OPT_USER_ID: kuid = result.uid; /* * The requested uid must be representable in the * filesystem's idmapping. */ if (!kuid_has_mapping(fsc->user_ns, kuid)) return invalfc(fsc, "Invalid user_id"); ctx->user_id = kuid; ctx->user_id_present = true; break; case OPT_GROUP_ID: kgid = result.gid; /* * The requested gid must be representable in the * filesystem's idmapping. */ if (!kgid_has_mapping(fsc->user_ns, kgid)) return invalfc(fsc, "Invalid group_id"); ctx->group_id = kgid; ctx->group_id_present = true; break; case OPT_DEFAULT_PERMISSIONS: ctx->default_permissions = true; break; case OPT_ALLOW_OTHER: ctx->allow_other = true; break; case OPT_MAX_READ: ctx->max_read = result.uint_32; break; case OPT_BLKSIZE: if (!ctx->is_bdev) return invalfc(fsc, "blksize only supported for fuseblk"); ctx->blksize = result.uint_32; break; default: return -EINVAL; } return 0; } static void fuse_free_fsc(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; if (ctx) { kfree(ctx->subtype); kfree(ctx); } } static int fuse_show_options(struct seq_file *m, struct dentry *root) { struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); if (fc->legacy_opts_show) { seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); if (fc->default_permissions) seq_puts(m, ",default_permissions"); if (fc->allow_other) seq_puts(m, ",allow_other"); if (fc->max_read != ~0) seq_printf(m, ",max_read=%u", fc->max_read); if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) seq_printf(m, ",blksize=%lu", sb->s_blocksize); } #ifdef CONFIG_FUSE_DAX if (fc->dax_mode == FUSE_DAX_ALWAYS) seq_puts(m, ",dax=always"); else if (fc->dax_mode == FUSE_DAX_NEVER) seq_puts(m, ",dax=never"); else if (fc->dax_mode == FUSE_DAX_INODE_USER) seq_puts(m, ",dax=inode"); #endif return 0; } static void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv) { memset(fiq, 0, sizeof(struct fuse_iqueue)); spin_lock_init(&fiq->lock); init_waitqueue_head(&fiq->waitq); INIT_LIST_HEAD(&fiq->pending); INIT_LIST_HEAD(&fiq->interrupts); fiq->forget_list_tail = &fiq->forget_list_head; fiq->connected = 1; fiq->ops = ops; fiq->priv = priv; } static void fuse_pqueue_init(struct fuse_pqueue *fpq) { unsigned int i; spin_lock_init(&fpq->lock); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) INIT_LIST_HEAD(&fpq->processing[i]); INIT_LIST_HEAD(&fpq->io); fpq->connected = 1; } void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, struct user_namespace *user_ns, const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); spin_lock_init(&fc->bg_lock); init_rwsem(&fc->killsb); refcount_set(&fc->count, 1); atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); INIT_LIST_HEAD(&fc->devices); atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; atomic64_set(&fc->khctr, 0); fc->polled_files = RB_ROOT; fc->blocked = 0; fc->initialized = 0; fc->connected = 1; atomic64_set(&fc->attr_version, 1); atomic64_set(&fc->evict_ctr, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages_limit = fuse_max_pages_limit; if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_backing_files_init(fc); INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); fm->fc = fc; } EXPORT_SYMBOL_GPL(fuse_conn_init); static void delayed_release(struct rcu_head *p) { struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); put_user_ns(fc->user_ns); fc->release(fc); } void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { struct fuse_iqueue *fiq = &fc->iq; struct fuse_sync_bucket *bucket; if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc);