Total coverage: 319476 (17%)of 1905728
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 // SPDX-License-Identifier: GPL-2.0-only /* * drivers/acpi/device_pm.c - ACPI device power management routines. * * Copyright (C) 2012, Intel Corp. * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #define pr_fmt(fmt) "PM: " fmt #include <linux/acpi.h> #include <linux/export.h> #include <linux/mutex.h> #include <linux/pm_qos.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/suspend.h> #include "fan.h" #include "internal.h" /** * acpi_power_state_string - String representation of ACPI device power state. * @state: ACPI device power state to return the string representation of. */ const char *acpi_power_state_string(int state) { switch (state) { case ACPI_STATE_D0: return "D0"; case ACPI_STATE_D1: return "D1"; case ACPI_STATE_D2: return "D2"; case ACPI_STATE_D3_HOT: return "D3hot"; case ACPI_STATE_D3_COLD: return "D3cold"; default: return "(unknown)"; } } static int acpi_dev_pm_explicit_get(struct acpi_device *device, int *state) { unsigned long long psc; acpi_status status; status = acpi_evaluate_integer(device->handle, "_PSC", NULL, &psc); if (ACPI_FAILURE(status)) return -ENODEV; *state = psc; return 0; } /** * acpi_device_get_power - Get power state of an ACPI device. * @device: Device to get the power state of. * @state: Place to store the power state of the device. * * This function does not update the device's power.state field, but it may * update its parent's power.state field (when the parent's power state is * unknown and the device's power state turns out to be D0). * * Also, it does not update power resource reference counters to ensure that * the power state returned by it will be persistent and it may return a power * state shallower than previously set by acpi_device_set_power() for @device * (if that power state depends on any power resources). */ int acpi_device_get_power(struct acpi_device *device, int *state) { int result = ACPI_STATE_UNKNOWN; struct acpi_device *parent; int error; if (!device || !state) return -EINVAL; parent = acpi_dev_parent(device); if (!device->flags.power_manageable) { /* TBD: Non-recursive algorithm for walking up hierarchy. */ *state = parent ? parent->power.state : ACPI_STATE_D0; goto out; } /* * Get the device's power state from power resources settings and _PSC, * if available. */ if (device->power.flags.power_resources) { error = acpi_power_get_inferred_state(device, &result); if (error) return error; } if (device->power.flags.explicit_get) { int psc; error = acpi_dev_pm_explicit_get(device, &psc); if (error) return error; /* * The power resources settings may indicate a power state * shallower than the actual power state of the device, because * the same power resources may be referenced by other devices. * * For systems predating ACPI 4.0 we assume that D3hot is the * deepest state that can be supported. */ if (psc > result && psc < ACPI_STATE_D3_COLD) result = psc; else if (result == ACPI_STATE_UNKNOWN) result = psc > ACPI_STATE_D2 ? ACPI_STATE_D3_HOT : psc; } /* * If we were unsure about the device parent's power state up to this * point, the fact that the device is in D0 implies that the parent has * to be in D0 too, except if ignore_parent is set. */ if (!device->power.flags.ignore_parent && parent && parent->power.state == ACPI_STATE_UNKNOWN && result == ACPI_STATE_D0) parent->power.state = ACPI_STATE_D0; *state = result; out: acpi_handle_debug(device->handle, "Power state: %s\n", acpi_power_state_string(*state)); return 0; } static int acpi_dev_pm_explicit_set(struct acpi_device *adev, int state) { if (adev->power.states[state].flags.explicit_set) { char method[5] = { '_', 'P', 'S', '0' + state, '\0' }; acpi_status status; status = acpi_evaluate_object(adev->handle, method, NULL, NULL); if (ACPI_FAILURE(status)) return -ENODEV; } return 0; } /** * acpi_device_set_power - Set power state of an ACPI device. * @device: Device to set the power state of. * @state: New power state to set. * * Callers must ensure that the device is power manageable before using this * function. */ int acpi_device_set_power(struct acpi_device *device, int state) { int target_state = state; int result = 0; if (!device || !device->flags.power_manageable || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD)) return -EINVAL; acpi_handle_debug(device->handle, "Power state change: %s -> %s\n", acpi_power_state_string(device->power.state), acpi_power_state_string(state)); /* Make sure this is a valid target state */ /* There is a special case for D0 addressed below. */ if (state > ACPI_STATE_D0 && state == device->power.state) goto no_change; if (state == ACPI_STATE_D3_COLD) { /* * For transitions to D3cold we need to execute _PS3 and then * possibly drop references to the power resources in use. */ state = ACPI_STATE_D3_HOT; /* If D3cold is not supported, use D3hot as the target state. */ if (!device->power.states[ACPI_STATE_D3_COLD].flags.valid) target_state = state; } else if (!device->power.states[state].flags.valid) { acpi_handle_debug(device->handle, "Power state %s not supported\n", acpi_power_state_string(state)); return -ENODEV; } if (!device->power.flags.ignore_parent) { struct acpi_device *parent; parent = acpi_dev_parent(device); if (parent && state < parent->power.state) { acpi_handle_debug(device->handle, "Cannot transition to %s for parent in %s\n", acpi_power_state_string(state), acpi_power_state_string(parent->power.state)); return -ENODEV; } } /* * Transition Power * ---------------- * In accordance with ACPI 6, _PSx is executed before manipulating power * resources, unless the target state is D0, in which case _PS0 is * supposed to be executed after turning the power resources on. */ if (state > ACPI_STATE_D0) { /* * According to ACPI 6, devices cannot go from lower-power * (deeper) states to higher-power (shallower) states. */ if (state < device->power.state) { acpi_handle_debug(device->handle, "Cannot transition from %s to %s\n", acpi_power_state_string(device->power.state), acpi_power_state_string(state)); return -ENODEV; } /* * If the device goes from D3hot to D3cold, _PS3 has been * evaluated for it already, so skip it in that case. */ if (device->power.state < ACPI_STATE_D3_HOT) { result = acpi_dev_pm_explicit_set(device, state); if (result) goto end; } if (device->power.flags.power_resources) result = acpi_power_transition(device, target_state); } else { int cur_state = device->power.state; if (device->power.flags.power_resources) { result = acpi_power_transition(device, ACPI_STATE_D0); if (result) goto end; } if (cur_state == ACPI_STATE_D0) { int psc; /* Nothing to do here if _PSC is not present. */ if (!device->power.flags.explicit_get) goto no_change; /* * The power state of the device was set to D0 last * time, but that might have happened before a * system-wide transition involving the platform * firmware, so it may be necessary to evaluate _PS0 * for the device here. However, use extra care here * and evaluate _PSC to check the device's current power * state, and only invoke _PS0 if the evaluation of _PSC * is successful and it returns a power state different * from D0. */ result = acpi_dev_pm_explicit_get(device, &psc); if (result || psc == ACPI_STATE_D0) goto no_change; } result = acpi_dev_pm_explicit_set(device, ACPI_STATE_D0); } end: if (result) { acpi_handle_debug(device->handle, "Failed to change power state to %s\n", acpi_power_state_string(target_state)); } else { device->power.state = target_state; acpi_handle_debug(device->handle, "Power state changed to %s\n", acpi_power_state_string(target_state)); } return result; no_change: acpi_handle_debug(device->handle, "Already in %s\n", acpi_power_state_string(state)); return 0; } EXPORT_SYMBOL(acpi_device_set_power); int acpi_bus_set_power(acpi_handle handle, int state) { struct acpi_device *device = acpi_fetch_acpi_dev(handle); if (device) return acpi_device_set_power(device, state); return -ENODEV; } EXPORT_SYMBOL(acpi_bus_set_power); int acpi_bus_init_power(struct acpi_device *device) { int state; int result; if (!device) return -EINVAL; device->power.state = ACPI_STATE_UNKNOWN; if (!acpi_device_is_present(device)) { device->flags.initialized = false; return -ENXIO; } result = acpi_device_get_power(device, &state); if (result) return result; if (state < ACPI_STATE_D3_COLD && device->power.flags.power_resources) { /* Reference count the power resources. */ result = acpi_power_on_resources(device, state); if (result) return result; if (state == ACPI_STATE_D0) { /* * If _PSC is not present and the state inferred from * power resources appears to be D0, it still may be * necessary to execute _PS0 at this point, because * another device using the same power resources may * have been put into D0 previously and that's why we * see D0 here. */ result = acpi_dev_pm_explicit_set(device, state); if (result) return result; } } else if (state == ACPI_STATE_UNKNOWN) { /* * No power resources and missing _PSC? Cross fingers and make * it D0 in hope that this is what the BIOS put the device into. * [We tried to force D0 here by executing _PS0, but that broke * Toshiba P870-303 in a nasty way.] */ state = ACPI_STATE_D0; } device->power.state = state; return 0; } /** * acpi_device_fix_up_power - Force device with missing _PSC into D0. * @device: Device object whose power state is to be fixed up. * * Devices without power resources and _PSC, but having _PS0 and _PS3 defined, * are assumed to be put into D0 by the BIOS. However, in some cases that may * not be the case and this function should be used then. */ int acpi_device_fix_up_power(struct acpi_device *device) { int ret = 0; if (!device->power.flags.power_resources && !device->power.flags.explicit_get && device->power.state == ACPI_STATE_D0) ret = acpi_dev_pm_explicit_set(device, ACPI_STATE_D0); return ret; } EXPORT_SYMBOL_GPL(acpi_device_fix_up_power); static int fix_up_power_if_applicable(struct acpi_device *adev, void *not_used) { if (adev->status.present && adev->status.enabled) acpi_device_fix_up_power(adev); return 0; } /** * acpi_device_fix_up_power_extended - Force device and its children into D0. * @adev: Parent device object whose power state is to be fixed up. * * Call acpi_device_fix_up_power() for @adev and its children so long as they * are reported as present and enabled. */ void acpi_device_fix_up_power_extended(struct acpi_device *adev) { acpi_device_fix_up_power(adev); acpi_dev_for_each_child(adev, fix_up_power_if_applicable, NULL); } EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_extended); /** * acpi_device_fix_up_power_children - Force a device's children into D0. * @adev: Parent device object whose children's power state is to be fixed up. * * Call acpi_device_fix_up_power() for @adev's children so long as they * are reported as present and enabled. */ void acpi_device_fix_up_power_children(struct acpi_device *adev) { acpi_dev_for_each_child(adev, fix_up_power_if_applicable, NULL); } EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_children); int acpi_device_update_power(struct acpi_device *device, int *state_p) { int state; int result; if (device->power.state == ACPI_STATE_UNKNOWN) { result = acpi_bus_init_power(device); if (!result && state_p) *state_p = device->power.state; return result; } result = acpi_device_get_power(device, &state); if (result) return result; if (state == ACPI_STATE_UNKNOWN) { state = ACPI_STATE_D0; result = acpi_device_set_power(device, state); if (result) return result; } else { if (device->power.flags.power_resources) { /* * We don't need to really switch the state, bu we need * to update the power resources' reference counters. */ result = acpi_power_transition(device, state); if (result) return result; } device->power.state = state; } if (state_p) *state_p = state; return 0; } EXPORT_SYMBOL_GPL(acpi_device_update_power); int acpi_bus_update_power(acpi_handle handle, int *state_p) { struct acpi_device *device = acpi_fetch_acpi_dev(handle); if (device) return acpi_device_update_power(device, state_p); return -ENODEV; } EXPORT_SYMBOL_GPL(acpi_bus_update_power); bool acpi_bus_power_manageable(acpi_handle handle) { struct acpi_device *device = acpi_fetch_acpi_dev(handle); return device && device->flags.power_manageable; } EXPORT_SYMBOL(acpi_bus_power_manageable); static int acpi_power_up_if_adr_present(struct acpi_device *adev, void *not_used) { if (!(adev->flags.power_manageable && adev->pnp.type.bus_address)) return 0; acpi_handle_debug(adev->handle, "Power state: %s\n", acpi_power_state_string(adev->power.state)); if (adev->power.state == ACPI_STATE_D3_COLD) return acpi_device_set_power(adev, ACPI_STATE_D0); return 0; } /** * acpi_dev_power_up_children_with_adr - Power up childres with valid _ADR * @adev: Parent ACPI device object. * * Change the power states of the direct children of @adev that are in D3cold * and hold valid _ADR objects to D0 in order to allow bus (e.g. PCI) * enumeration code to access them. */ void acpi_dev_power_up_children_with_adr(struct acpi_device *adev) { acpi_dev_for_each_child(adev, acpi_power_up_if_adr_present, NULL); } /** * acpi_dev_power_state_for_wake - Deepest power state for wakeup signaling * @adev: ACPI companion of the target device. * * Evaluate _S0W for @adev and return the value produced by it or return * ACPI_STATE_UNKNOWN on errors (including _S0W not present). */ u8 acpi_dev_power_state_for_wake(struct acpi_device *adev) { unsigned long long state; acpi_status status; status = acpi_evaluate_integer(adev->handle, "_S0W", NULL, &state); if (ACPI_FAILURE(status)) return ACPI_STATE_UNKNOWN; return state; } #ifdef CONFIG_PM static DEFINE_MUTEX(acpi_pm_notifier_lock); static DEFINE_MUTEX(acpi_pm_notifier_install_lock); void acpi_pm_wakeup_event(struct device *dev) { pm_wakeup_dev_event(dev, 0, acpi_s2idle_wakeup()); } EXPORT_SYMBOL_GPL(acpi_pm_wakeup_event); static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) { struct acpi_device *adev; if (val != ACPI_NOTIFY_DEVICE_WAKE) return; acpi_handle_debug(handle, "Wake notify\n"); adev = acpi_get_acpi_dev(handle); if (!adev) return; mutex_lock(&acpi_pm_notifier_lock); if (adev->wakeup.flags.notifier_present) { pm_wakeup_ws_event(adev->wakeup.ws, 0, acpi_s2idle_wakeup()); if (adev->wakeup.context.func) { acpi_handle_debug(handle, "Running %pS for %s\n", adev->wakeup.context.func, dev_name(adev->wakeup.context.dev)); adev->wakeup.context.func(&adev->wakeup.context); } } mutex_unlock(&acpi_pm_notifier_lock); acpi_put_acpi_dev(adev); } /** * acpi_add_pm_notifier - Register PM notify handler for given ACPI device. * @adev: ACPI device to add the notify handler for. * @dev: Device to generate a wakeup event for while handling the notification. * @func: Work function to execute when handling the notification. * * NOTE: @adev need not be a run-wake or wakeup device to be a valid source of * PM wakeup events. For example, wakeup events may be generated for bridges * if one of the devices below the bridge is signaling wakeup, even if the * bridge itself doesn't have a wakeup GPE associated with it. */ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, void (*func)(struct acpi_device_wakeup_context *context)) { acpi_status status = AE_ALREADY_EXISTS; if (!dev && !func) return AE_BAD_PARAMETER; mutex_lock(&acpi_pm_notifier_install_lock); if (adev->wakeup.flags.notifier_present) goto out; status = acpi_install_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY, acpi_pm_notify_handler, NULL); if (ACPI_FAILURE(status)) goto out; mutex_lock(&acpi_pm_notifier_lock); adev->wakeup.ws = wakeup_source_register(dev, dev_name(&adev->dev)); adev->wakeup.context.dev = dev; adev->wakeup.context.func = func; adev->wakeup.flags.notifier_present = true; mutex_unlock(&acpi_pm_notifier_lock); out: mutex_unlock(&acpi_pm_notifier_install_lock); return status; } /** * acpi_remove_pm_notifier - Unregister PM notifier from given ACPI device. * @adev: ACPI device to remove the notifier from. */ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev) { acpi_status status = AE_BAD_PARAMETER; mutex_lock(&acpi_pm_notifier_install_lock); if (!adev->wakeup.flags.notifier_present) goto out; status = acpi_remove_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY, acpi_pm_notify_handler); if (ACPI_FAILURE(status)) goto out; mutex_lock(&acpi_pm_notifier_lock); adev->wakeup.context.func = NULL; adev->wakeup.context.dev = NULL; wakeup_source_unregister(adev->wakeup.ws); adev->wakeup.flags.notifier_present = false; mutex_unlock(&acpi_pm_notifier_lock); out: mutex_unlock(&acpi_pm_notifier_install_lock); return status; } bool acpi_bus_can_wakeup(acpi_handle handle) { struct acpi_device *device = acpi_fetch_acpi_dev(handle); return device && device->wakeup.flags.valid; } EXPORT_SYMBOL(acpi_bus_can_wakeup); bool acpi_pm_device_can_wakeup(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); return adev ? acpi_device_can_wakeup(adev) : false; } /** * acpi_dev_pm_get_state - Get preferred power state of ACPI device. * @dev: Device whose preferred target power state to return. * @adev: ACPI device node corresponding to @dev. * @target_state: System state to match the resultant device state. * @d_min_p: Location to store the highest power state available to the device. * @d_max_p: Location to store the lowest power state available to the device. * * Find the lowest power (highest number) and highest power (lowest number) ACPI * device power states that the device can be in while the system is in the * state represented by @target_state. Store the integer numbers representing * those stats in the memory locations pointed to by @d_max_p and @d_min_p, * respectively. * * Callers must ensure that @dev and @adev are valid pointers and that @adev * actually corresponds to @dev before using this function. * * Returns 0 on success or -ENODATA when one of the ACPI methods fails or * returns a value that doesn't make sense. The memory locations pointed to by * @d_max_p and @d_min_p are only modified on success. */ static int acpi_dev_pm_get_state(struct device *dev, struct acpi_device *adev, u32 target_state, int *d_min_p, int *d_max_p) { char method[] = { '_', 'S', '0' + target_state, 'D', '\0' }; acpi_handle handle = adev->handle; unsigned long long ret; int d_min, d_max; bool wakeup = false; bool has_sxd = false; acpi_status status; /* * If the system state is S0, the lowest power state the device can be * in is D3cold, unless the device has _S0W and is supposed to signal * wakeup, in which case the return value of _S0W has to be used as the * lowest power state available to the device. */ d_min = ACPI_STATE_D0; d_max = ACPI_STATE_D3_COLD; /* * If present, _SxD methods return the minimum D-state (highest power * state) we can use for the corresponding S-states. Otherwise, the * minimum D-state is D0 (ACPI 3.x). */ if (target_state > ACPI_STATE_S0) { /* * We rely on acpi_evaluate_integer() not clobbering the integer * provided if AE_NOT_FOUND is returned. */ ret = d_min; status = acpi_evaluate_integer(handle, method, NULL, &ret); if ((ACPI_FAILURE(status) && status != AE_NOT_FOUND) || ret > ACPI_STATE_D3_COLD) return -ENODATA; /* * We need to handle legacy systems where D3hot and D3cold are * the same and 3 is returned in both cases, so fall back to * D3cold if D3hot is not a valid state. */ if (!adev->power.states[ret].flags.valid) { if (ret == ACPI_STATE_D3_HOT) ret = ACPI_STATE_D3_COLD; else return -ENODATA; } if (status == AE_OK) has_sxd = true; d_min = ret; wakeup = device_may_wakeup(dev) && adev->wakeup.flags.valid && adev->wakeup.sleep_state >= target_state; } else if (device_may_wakeup(dev) && dev->power.wakeirq) { /* * The ACPI subsystem doesn't manage the wake bit for IRQs * defined with ExclusiveAndWake and SharedAndWake. Instead we * expect them to be managed via the PM subsystem. Drivers * should call dev_pm_set_wake_irq to register an IRQ as a wake * source. * * If a device has a wake IRQ attached we need to check the * _S0W method to get the correct wake D-state. Otherwise we * end up putting the device into D3Cold which will more than * likely disable wake functionality. */ wakeup = true; } else { /* ACPI GPE is specified in _PRW. */ wakeup = adev->wakeup.flags.valid; } /* * If _PRW says we can wake up the system from the target sleep state, * the D-state returned by _SxD is sufficient for that (we assume a * wakeup-aware driver if wake is set). Still, if _SxW exists * (ACPI 3.x), it should return the maximum (lowest power) D-state that * can wake the system. _S0W may be valid, too. */ if (wakeup) { method[3] = 'W'; status = acpi_evaluate_integer(handle, method, NULL, &ret); if (status == AE_NOT_FOUND) { /* No _SxW. In this case, the ACPI spec says that we * must not go into any power state deeper than the * value returned from _SxD. */ if (has_sxd && target_state > ACPI_STATE_S0) d_max = d_min; } else if (ACPI_SUCCESS(status) && ret <= ACPI_STATE_D3_COLD) { /* Fall back to D3cold if ret is not a valid state. */ if (!adev->power.states[ret].flags.valid) ret = ACPI_STATE_D3_COLD; d_max = ret > d_min ? ret : d_min; } else { return -ENODATA; } } if (d_min_p) *d_min_p = d_min; if (d_max_p) *d_max_p = d_max; return 0; } /** * acpi_pm_device_sleep_state - Get preferred power state of ACPI device. * @dev: Device whose preferred target power state to return. * @d_min_p: Location to store the upper limit of the allowed states range. * @d_max_in: Deepest low-power state to take into consideration. * Return value: Preferred power state of the device on success, -ENODEV * if there's no 'struct acpi_device' for @dev, -EINVAL if @d_max_in is * incorrect, or -ENODATA on ACPI method failure. * * The caller must ensure that @dev is valid before using this function. */ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in) { struct acpi_device *adev; int ret, d_min, d_max; if (d_max_in < ACPI_STATE_D0 || d_max_in > ACPI_STATE_D3_COLD) return -EINVAL; if (d_max_in > ACPI_STATE_D2) { enum pm_qos_flags_status stat; stat = dev_pm_qos_flags(dev, PM_QOS_FLAG_NO_POWER_OFF); if (stat == PM_QOS_FLAGS_ALL) d_max_in = ACPI_STATE_D2; } adev = ACPI_COMPANION(dev); if (!adev) { dev_dbg(dev, "ACPI companion missing in %s!\n", __func__); return -ENODEV; } ret = acpi_dev_pm_get_state(dev, adev, acpi_target_system_state(), &d_min, &d_max); if (ret) return ret; if (d_max_in < d_min) return -EINVAL; if (d_max > d_max_in) { for (d_max = d_max_in; d_max > d_min; d_max--) { if (adev->power.states[d_max].flags.valid) break; } } if (d_min_p) *d_min_p = d_min; return d_max; } EXPORT_SYMBOL(acpi_pm_device_sleep_state); /** * acpi_pm_notify_work_func - ACPI devices wakeup notification work function. * @context: Device wakeup context. */ static void acpi_pm_notify_work_func(struct acpi_device_wakeup_context *context) { struct device *dev = context->dev; if (dev) { pm_wakeup_event(dev, 0); pm_request_resume(dev); } } static DEFINE_MUTEX(acpi_wakeup_lock); static int __acpi_device_wakeup_enable(struct acpi_device *adev, u32 target_state) { struct acpi_device_wakeup *wakeup = &adev->wakeup; acpi_status status; int error = 0; mutex_lock(&acpi_wakeup_lock); /* * If the device wakeup power is already enabled, disable it and enable * it again in case it depends on the configuration of subordinate * devices and the conditions have changed since it was enabled last * time. */ if (wakeup->enable_count > 0) acpi_disable_wakeup_device_power(adev); error = acpi_enable_wakeup_device_power(adev, target_state); if (error) { if (wakeup->enable_count > 0) { acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); wakeup->enable_count = 0; } goto out; } if (wakeup->enable_count > 0) goto inc; status = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number); if (ACPI_FAILURE(status)) { acpi_disable_wakeup_device_power(adev); error = -EIO; goto out; } acpi_handle_debug(adev->handle, "GPE%2X enabled for wakeup\n", (unsigned int)wakeup->gpe_number); inc: if (wakeup->enable_count < INT_MAX) wakeup->enable_count++; else acpi_handle_info(adev->handle, "Wakeup enable count out of bounds!\n"); out: mutex_unlock(&acpi_wakeup_lock); return error; } /** * acpi_device_wakeup_enable - Enable wakeup functionality for device. * @adev: ACPI device to enable wakeup functionality for. * @target_state: State the system is transitioning into. * * Enable the GPE associated with @adev so that it can generate wakeup signals * for the device in response to external (remote) events and enable wakeup * power for it. * * Callers must ensure that @adev is a valid ACPI device node before executing * this function. */ static int acpi_device_wakeup_enable(struct acpi_device *adev, u32 target_state) { return __acpi_device_wakeup_enable(adev, target_state); } /** * acpi_device_wakeup_disable - Disable wakeup functionality for device. * @adev: ACPI device to disable wakeup functionality for. * * Disable the GPE associated with @adev and disable wakeup power for it. * * Callers must ensure that @adev is a valid ACPI device node before executing * this function. */ static void acpi_device_wakeup_disable(struct acpi_device *adev) { struct acpi_device_wakeup *wakeup = &adev->wakeup; mutex_lock(&acpi_wakeup_lock); if (!wakeup->enable_count) goto out; acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); acpi_disable_wakeup_device_power(adev); wakeup->enable_count--; out: mutex_unlock(&acpi_wakeup_lock); } /** * acpi_pm_set_device_wakeup - Enable/disable remote wakeup for given device. * @dev: Device to enable/disable to generate wakeup events. * @enable: Whether to enable or disable the wakeup functionality. */ int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { struct acpi_device *adev; int error; adev = ACPI_COMPANION(dev); if (!adev) { dev_dbg(dev, "ACPI companion missing in %s!\n", __func__); return -ENODEV; } if (!acpi_device_can_wakeup(adev)) return -EINVAL; if (!enable) { acpi_device_wakeup_disable(adev); dev_dbg(dev, "Wakeup disabled by ACPI\n"); return 0; } error = __acpi_device_wakeup_enable(adev, acpi_target_system_state()); if (!error) dev_dbg(dev, "Wakeup enabled by ACPI\n"); return error; } EXPORT_SYMBOL_GPL(acpi_pm_set_device_wakeup); /** * acpi_dev_pm_low_power - Put ACPI device into a low-power state. * @dev: Device to put into a low-power state. * @adev: ACPI device node corresponding to @dev. * @system_state: System state to choose the device state for. */ static int acpi_dev_pm_low_power(struct device *dev, struct acpi_device *adev, u32 system_state) { int ret, state; if (!acpi_device_power_manageable(adev)) return 0; ret = acpi_dev_pm_get_state(dev, adev, system_state, NULL, &state); return ret ? ret : acpi_device_set_power(adev, state); } /** * acpi_dev_pm_full_power - Put ACPI device into the full-power state. * @adev: ACPI device node to put into the full-power state. */ static int acpi_dev_pm_full_power(struct acpi_device *adev) { return acpi_device_power_manageable(adev) ? acpi_device_set_power(adev, ACPI_STATE_D0) : 0; } /** * acpi_dev_suspend - Put device into a low-power state using ACPI. * @dev: Device to put into a low-power state. * @wakeup: Whether or not to enable wakeup for the device. * * Put the given device into a low-power state using the standard ACPI * mechanism. Set up remote wakeup if desired, choose the state to put the * device into (this checks if remote wakeup is expected to work too), and set * the power state of the device. */ int acpi_dev_suspend(struct device *dev, bool wakeup) { struct acpi_device *adev = ACPI_COMPANION(dev); u32 target_state = acpi_target_system_state(); int error; if (!adev) return 0; if (wakeup && acpi_device_can_wakeup(adev)) { error = acpi_device_wakeup_enable(adev, target_state); if (error) return -EAGAIN; } else { wakeup = false; } error = acpi_dev_pm_low_power(dev, adev, target_state); if (error && wakeup) acpi_device_wakeup_disable(adev); return error; } EXPORT_SYMBOL_GPL(acpi_dev_suspend); /** * acpi_dev_resume - Put device into the full-power state using ACPI. * @dev: Device to put into the full-power state. * * Put the given device into the full-power state using the standard ACPI * mechanism. Set the power state of the device to ACPI D0 and disable wakeup. */ int acpi_dev_resume(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); int error; if (!adev) return 0; error = acpi_dev_pm_full_power(adev); acpi_device_wakeup_disable(adev); return error; } EXPORT_SYMBOL_GPL(acpi_dev_resume); /** * acpi_subsys_runtime_suspend - Suspend device using ACPI. * @dev: Device to suspend. * * Carry out the generic runtime suspend procedure for @dev and use ACPI to put * it into a runtime low-power state. */ int acpi_subsys_runtime_suspend(struct device *dev) { int ret = pm_generic_runtime_suspend(dev); return ret ? ret : acpi_dev_suspend(dev, true); } EXPORT_SYMBOL_GPL(acpi_subsys_runtime_suspend); /** * acpi_subsys_runtime_resume - Resume device using ACPI. * @dev: Device to Resume. * * Use ACPI to put the given device into the full-power state and carry out the * generic runtime resume procedure for it. */ int acpi_subsys_runtime_resume(struct device *dev) { int ret = acpi_dev_resume(dev); return ret ? ret : pm_generic_runtime_resume(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_runtime_resume); #ifdef CONFIG_PM_SLEEP static bool acpi_dev_needs_resume(struct device *dev, struct acpi_device *adev) { u32 sys_target = acpi_target_system_state(); int ret, state; if (!pm_runtime_suspended(dev) || !adev || (adev->wakeup.flags.valid && device_may_wakeup(dev) != !!adev->wakeup.prepare_count)) return true; if (sys_target == ACPI_STATE_S0) return false; if (adev->power.flags.dsw_present) return true; ret = acpi_dev_pm_get_state(dev, adev, sys_target, NULL, &state); if (ret) return true; return state != adev->power.state; } /** * acpi_subsys_prepare - Prepare device for system transition to a sleep state. * @dev: Device to prepare. */ int acpi_subsys_prepare(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); dev_pm_set_strict_midlayer(dev, true); if (dev->driver && dev->driver->pm && dev->driver->pm->prepare) { int ret = dev->driver->pm->prepare(dev); if (ret < 0) return ret; if (!ret && dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_PREPARE)) return 0; } return !acpi_dev_needs_resume(dev, adev); } EXPORT_SYMBOL_GPL(acpi_subsys_prepare); /** * acpi_subsys_complete - Finalize device's resume during system resume. * @dev: Device to handle. */ void acpi_subsys_complete(struct device *dev) { pm_generic_complete(dev); /* * If the device had been runtime-suspended before the system went into * the sleep state it is going out of and it has never been resumed till * now, resume it in case the firmware powered it up. */ if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) pm_request_resume(dev); dev_pm_set_strict_midlayer(dev, false); } EXPORT_SYMBOL_GPL(acpi_subsys_complete); /** * acpi_subsys_suspend - Run the device driver's suspend callback. * @dev: Device to handle. * * Follow PCI and resume devices from runtime suspend before running their * system suspend callbacks, unless the driver can cope with runtime-suspended * devices during system suspend and there are no ACPI-specific reasons for * resuming them. */ int acpi_subsys_suspend(struct device *dev) { if (!dev_pm_smart_suspend(dev) || acpi_dev_needs_resume(dev, ACPI_COMPANION(dev))) pm_runtime_resume(dev); return pm_generic_suspend(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_suspend); /** * acpi_subsys_suspend_late - Suspend device using ACPI. * @dev: Device to suspend. * * Carry out the generic late suspend procedure for @dev and use ACPI to put * it into a low-power state during system transition into a sleep state. */ int acpi_subsys_suspend_late(struct device *dev) { int ret; if (dev_pm_skip_suspend(dev)) return 0; ret = pm_generic_suspend_late(dev); return ret ? ret : acpi_dev_suspend(dev, device_may_wakeup(dev)); } EXPORT_SYMBOL_GPL(acpi_subsys_suspend_late); /** * acpi_subsys_suspend_noirq - Run the device driver's "noirq" suspend callback. * @dev: Device to suspend. */ int acpi_subsys_suspend_noirq(struct device *dev) { int ret; if (dev_pm_skip_suspend(dev)) return 0; ret = pm_generic_suspend_noirq(dev); if (ret) return ret; /* * If the target system sleep state is suspend-to-idle, it is sufficient * to check whether or not the device's wakeup settings are good for * runtime PM. Otherwise, the pm_resume_via_firmware() check will cause * acpi_subsys_complete() to take care of fixing up the device's state * anyway, if need be. */ if (device_can_wakeup(dev) && !device_may_wakeup(dev)) dev->power.may_skip_resume = false; return 0; } EXPORT_SYMBOL_GPL(acpi_subsys_suspend_noirq); /** * acpi_subsys_resume_noirq - Run the device driver's "noirq" resume callback. * @dev: Device to handle. */ static int acpi_subsys_resume_noirq(struct device *dev) { if (dev_pm_skip_resume(dev)) return 0; return pm_generic_resume_noirq(dev); } /** * acpi_subsys_resume_early - Resume device using ACPI. * @dev: Device to Resume. * * Use ACPI to put the given device into the full-power state and carry out the * generic early resume procedure for it during system transition into the * working state, but only do that if device either defines early resume * handler, or does not define power operations at all. Otherwise powering up * of the device is postponed to the normal resume phase. */ static int acpi_subsys_resume_early(struct device *dev) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int ret; if (dev_pm_skip_resume(dev)) return 0; if (pm && !pm->resume_early) { dev_dbg(dev, "Postponing ACPI PM to normal resume stage\n"); return 0; } ret = acpi_dev_resume(dev); return ret ? ret : pm_generic_resume_early(dev); } /** * acpi_subsys_resume - Resume device using ACPI. * @dev: Device to Resume. * * Use ACPI to put the given device into the full-power state if it has not been * powered up during early resume phase, and carry out the generic resume * procedure for it during system transition into the working state. */ static int acpi_subsys_resume(struct device *dev) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int ret = 0; if (!dev_pm_skip_resume(dev) && pm && !pm->resume_early) { dev_dbg(dev, "Applying postponed ACPI PM\n"); ret = acpi_dev_resume(dev); } return ret ? ret : pm_generic_resume(dev); } /** * acpi_subsys_freeze - Run the device driver's freeze callback. * @dev: Device to handle. */ int acpi_subsys_freeze(struct device *dev) { /* * Resume all runtime-suspended devices before creating a snapshot * image of system memory, because the restore kernel generally cannot * be expected to always handle them consistently and they need to be * put into the runtime-active metastate during system resume anyway, * so it is better to ensure that the state saved in the image will be * always consistent with that. */ pm_runtime_resume(dev); return pm_generic_freeze(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_freeze); /** * acpi_subsys_restore_early - Restore device using ACPI. * @dev: Device to restore. */ int acpi_subsys_restore_early(struct device *dev) { int ret = acpi_dev_resume(dev); return ret ? ret : pm_generic_restore_early(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_restore_early); /** * acpi_subsys_poweroff - Run the device driver's poweroff callback. * @dev: Device to handle. * * Follow PCI and resume devices from runtime suspend before running their * system poweroff callbacks, unless the driver can cope with runtime-suspended * devices during system suspend and there are no ACPI-specific reasons for * resuming them. */ int acpi_subsys_poweroff(struct device *dev) { if (!dev_pm_smart_suspend(dev) || acpi_dev_needs_resume(dev, ACPI_COMPANION(dev))) pm_runtime_resume(dev); return pm_generic_poweroff(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_poweroff); /** * acpi_subsys_poweroff_late - Run the device driver's poweroff callback. * @dev: Device to handle. * * Carry out the generic late poweroff procedure for @dev and use ACPI to put * it into a low-power state during system transition into a sleep state. */ static int acpi_subsys_poweroff_late(struct device *dev) { int ret; if (dev_pm_skip_suspend(dev)) return 0; ret = pm_generic_poweroff_late(dev); if (ret) return ret; return acpi_dev_suspend(dev, device_may_wakeup(dev)); } /** * acpi_subsys_poweroff_noirq - Run the driver's "noirq" poweroff callback. * @dev: Device to suspend. */ static int acpi_subsys_poweroff_noirq(struct device *dev) { if (dev_pm_skip_suspend(dev)) return 0; return pm_generic_poweroff_noirq(dev); } #endif /* CONFIG_PM_SLEEP */ static void acpi_dev_pm_detach(struct device *dev, bool power_off); static struct dev_pm_domain acpi_general_pm_domain = { .ops = { .runtime_suspend = acpi_subsys_runtime_suspend, .runtime_resume = acpi_subsys_runtime_resume, #ifdef CONFIG_PM_SLEEP .prepare = acpi_subsys_prepare, .complete = acpi_subsys_complete, .suspend = acpi_subsys_suspend, .resume = acpi_subsys_resume, .suspend_late = acpi_subsys_suspend_late, .suspend_noirq = acpi_subsys_suspend_noirq, .resume_noirq = acpi_subsys_resume_noirq, .resume_early = acpi_subsys_resume_early, .freeze = acpi_subsys_freeze, .poweroff = acpi_subsys_poweroff, .poweroff_late = acpi_subsys_poweroff_late, .poweroff_noirq = acpi_subsys_poweroff_noirq, .restore_early = acpi_subsys_restore_early, #endif }, .detach = acpi_dev_pm_detach, }; /** * acpi_dev_pm_detach - Remove ACPI power management from the device. * @dev: Device to take care of. * @power_off: Whether or not to try to remove power from the device. * * Remove the device from the general ACPI PM domain and remove its wakeup * notifier. If @power_off is set, additionally remove power from the device if * possible. * * Callers must ensure proper synchronization of this function with power * management callbacks. */ static void acpi_dev_pm_detach(struct device *dev, bool power_off) { struct acpi_device *adev = ACPI_COMPANION(dev); if (adev && dev->pm_domain == &acpi_general_pm_domain) { dev_pm_domain_set(dev, NULL); acpi_remove_pm_notifier(adev); if (power_off) { /* * If the device's PM QoS resume latency limit or flags * have been exposed to user space, they have to be * hidden at this point, so that they don't affect the * choice of the low-power state to put the device into. */ dev_pm_qos_hide_latency_limit(dev); dev_pm_qos_hide_flags(dev); acpi_device_wakeup_disable(adev); acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0); } } } /** * acpi_dev_pm_attach - Prepare device for ACPI power management. * @dev: Device to prepare. * @power_on: Whether or not to power on the device. * * If @dev has a valid ACPI handle that has a valid struct acpi_device object * attached to it, install a wakeup notification handler for the device and * add it to the general ACPI PM domain. If @power_on is set, the device will * be put into the ACPI D0 state before the function returns. * * This assumes that the @dev's bus type uses generic power management callbacks * (or doesn't use any power management callbacks at all). * * Callers must ensure proper synchronization of this function with power * management callbacks. */ int acpi_dev_pm_attach(struct device *dev, bool power_on) { /* * Skip devices whose ACPI companions match the device IDs below, * because they require special power management handling incompatible * with the generic ACPI PM domain. */ static const struct acpi_device_id special_pm_ids[] = { ACPI_FAN_DEVICE_IDS, {} }; struct acpi_device *adev = ACPI_COMPANION(dev); if (!adev || !acpi_match_device_ids(adev, special_pm_ids)) return 0; /* * Skip devices whose ACPI companions don't support power management and * don't have a wakeup GPE. */ if (!acpi_device_power_manageable(adev) && !acpi_device_can_wakeup(adev)) { dev_dbg(dev, "No ACPI power management or wakeup GPE\n"); return 0; } /* * Only attach the power domain to the first device if the * companion is shared by multiple. This is to prevent doing power * management twice. */ if (!acpi_device_is_first_physical_node(adev, dev)) return 0; acpi_add_pm_notifier(adev, dev, acpi_pm_notify_work_func); dev_pm_domain_set(dev, &acpi_general_pm_domain); if (power_on) { acpi_dev_pm_full_power(adev); acpi_device_wakeup_disable(adev); } return 1; } EXPORT_SYMBOL_GPL(acpi_dev_pm_attach); /** * acpi_storage_d3 - Check if D3 should be used in the suspend path * @dev: Device to check * * Return %true if the platform firmware wants @dev to be programmed * into D3hot or D3cold (if supported) in the suspend path, or %false * when there is no specific preference. On some platforms, if this * hint is ignored, @dev may remain unresponsive after suspending the * platform as a whole. * * Although the property has storage in the name it actually is * applied to the PCIe slot and plugging in a non-storage device the * same platform restrictions will likely apply. */ bool acpi_storage_d3(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); u8 val; if (force_storage_d3()) return true; if (!adev) return false; if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable", &val)) return false; return val == 1; } EXPORT_SYMBOL_GPL(acpi_storage_d3); /** * acpi_dev_state_d0 - Tell if the device is in D0 power state * @dev: Physical device the ACPI power state of which to check * * On a system without ACPI, return true. On a system with ACPI, return true if * the current ACPI power state of the device is D0, or false otherwise. * * Note that the power state of a device is not well-defined after it has been * passed to acpi_device_set_power() and before that function returns, so it is * not valid to ask for the ACPI power state of the device in that time frame. * * This function is intended to be used in a driver's probe or remove * function. See Documentation/firmware-guide/acpi/non-d0-probe.rst for * more information. */ bool acpi_dev_state_d0(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); if (!adev) return true; return adev->power.state == ACPI_STATE_D0; } EXPORT_SYMBOL_GPL(acpi_dev_state_d0); #endif /* CONFIG_PM */
7 7 6 2 3 3 2 4 6 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 // SPDX-License-Identifier: GPL-2.0-only /* * Marvell NFC-over-USB driver: USB interface related functions * * Copyright (C) 2014, Marvell International Ltd. */ #include <linux/module.h> #include <linux/usb.h> #include <linux/nfc.h> #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include "nfcmrvl.h" static struct usb_device_id nfcmrvl_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x1286, 0x2046, USB_CLASS_VENDOR_SPEC, 4, 1) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, nfcmrvl_table); #define NFCMRVL_USB_BULK_RUNNING 1 #define NFCMRVL_USB_SUSPENDING 2 struct nfcmrvl_usb_drv_data { struct usb_device *udev; struct usb_interface *intf; unsigned long flags; struct work_struct waker; struct usb_anchor tx_anchor; struct usb_anchor bulk_anchor; struct usb_anchor deferred; int tx_in_flight; /* protects tx_in_flight */ spinlock_t txlock; struct usb_endpoint_descriptor *bulk_tx_ep; struct usb_endpoint_descriptor *bulk_rx_ep; int suspend_count; struct nfcmrvl_private *priv; }; static int nfcmrvl_inc_tx(struct nfcmrvl_usb_drv_data *drv_data) { unsigned long flags; int rv; spin_lock_irqsave(&drv_data->txlock, flags); rv = test_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags); if (!rv) drv_data->tx_in_flight++; spin_unlock_irqrestore(&drv_data->txlock, flags); return rv; } static void nfcmrvl_bulk_complete(struct urb *urb) { struct nfcmrvl_usb_drv_data *drv_data = urb->context; int err; dev_dbg(&drv_data->udev->dev, "urb %p status %d count %d\n", urb, urb->status, urb->actual_length); if (!test_bit(NFCMRVL_NCI_RUNNING, &drv_data->flags)) return; if (!urb->status) { struct sk_buff *skb; skb = nci_skb_alloc(drv_data->priv->ndev, urb->actual_length, GFP_ATOMIC); if (!skb) { nfc_err(&drv_data->udev->dev, "failed to alloc mem\n"); } else { skb_put_data(skb, urb->transfer_buffer, urb->actual_length); if (nfcmrvl_nci_recv_frame(drv_data->priv, skb) < 0) nfc_err(&drv_data->udev->dev, "corrupted Rx packet\n"); } } if (!test_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags)) return; usb_anchor_urb(urb, &drv_data->bulk_anchor); usb_mark_last_busy(drv_data->udev); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { /* -EPERM: urb is being killed; * -ENODEV: device got disconnected */ if (err != -EPERM && err != -ENODEV) nfc_err(&drv_data->udev->dev, "urb %p failed to resubmit (%d)\n", urb, -err); usb_unanchor_urb(urb); } } static int nfcmrvl_submit_bulk_urb(struct nfcmrvl_usb_drv_data *drv_data, gfp_t mem_flags) { struct urb *urb; unsigned char *buf; unsigned int pipe; int err, size = NFCMRVL_NCI_MAX_EVENT_SIZE; if (!drv_data->bulk_rx_ep) return -ENODEV; urb = usb_alloc_urb(0, mem_flags); if (!urb) return -ENOMEM; buf = kmalloc(size, mem_flags); if (!buf) { usb_free_urb(urb); return -ENOMEM; } pipe = usb_rcvbulkpipe(drv_data->udev, drv_data->bulk_rx_ep->bEndpointAddress); usb_fill_bulk_urb(urb, drv_data->udev, pipe, buf, size, nfcmrvl_bulk_complete, drv_data); urb->transfer_flags |= URB_FREE_BUFFER; usb_mark_last_busy(drv_data->udev); usb_anchor_urb(urb, &drv_data->bulk_anchor); err = usb_submit_urb(urb, mem_flags); if (err) { if (err != -EPERM && err != -ENODEV) nfc_err(&drv_data->udev->dev, "urb %p submission failed (%d)\n", urb, -err); usb_unanchor_urb(urb); } usb_free_urb(urb); return err; } static void nfcmrvl_tx_complete(struct urb *urb) { struct sk_buff *skb = urb->context; struct nci_dev *ndev = (struct nci_dev *)skb->dev; struct nfcmrvl_private *priv = nci_get_drvdata(ndev); struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data; unsigned long flags; nfc_info(priv->dev, "urb %p status %d count %d\n", urb, urb->status, urb->actual_length); spin_lock_irqsave(&drv_data->txlock, flags); drv_data->tx_in_flight--; spin_unlock_irqrestore(&drv_data->txlock, flags); kfree(urb->setup_packet); kfree_skb(skb); } static int nfcmrvl_usb_nci_open(struct nfcmrvl_private *priv) { struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data; int err; err = usb_autopm_get_interface(drv_data->intf); if (err) return err; drv_data->intf->needs_remote_wakeup = 1; err = nfcmrvl_submit_bulk_urb(drv_data, GFP_KERNEL); if (err) goto failed; set_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags); nfcmrvl_submit_bulk_urb(drv_data, GFP_KERNEL); usb_autopm_put_interface(drv_data->intf); return 0; failed: usb_autopm_put_interface(drv_data->intf); return err; } static void nfcmrvl_usb_stop_traffic(struct nfcmrvl_usb_drv_data *drv_data) { usb_kill_anchored_urbs(&drv_data->bulk_anchor); } static int nfcmrvl_usb_nci_close(struct nfcmrvl_private *priv) { struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data; int err; cancel_work_sync(&drv_data->waker); clear_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags); nfcmrvl_usb_stop_traffic(drv_data); usb_kill_anchored_urbs(&drv_data->tx_anchor); err = usb_autopm_get_interface(drv_data->intf); if (err) goto failed; drv_data->intf->needs_remote_wakeup = 0; usb_autopm_put_interface(drv_data->intf); failed: usb_scuttle_anchored_urbs(&drv_data->deferred); return 0; } static int nfcmrvl_usb_nci_send(struct nfcmrvl_private *priv, struct sk_buff *skb) { struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data; struct urb *urb; unsigned int pipe; int err; if (!drv_data->bulk_tx_ep) return -ENODEV; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; pipe = usb_sndbulkpipe(drv_data->udev, drv_data->bulk_tx_ep->bEndpointAddress); usb_fill_bulk_urb(urb, drv_data->udev, pipe, skb->data, skb->len, nfcmrvl_tx_complete, skb); err = nfcmrvl_inc_tx(drv_data); if (err) { usb_anchor_urb(urb, &drv_data->deferred); schedule_work(&drv_data->waker); err = 0; goto done; } usb_anchor_urb(urb, &drv_data->tx_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { if (err != -EPERM && err != -ENODEV) nfc_err(&drv_data->udev->dev, "urb %p submission failed (%d)\n", urb, -err); kfree(urb->setup_packet); usb_unanchor_urb(urb); } else { usb_mark_last_busy(drv_data->udev); } done: usb_free_urb(urb); return err; } static const struct nfcmrvl_if_ops usb_ops = { .nci_open = nfcmrvl_usb_nci_open, .nci_close = nfcmrvl_usb_nci_close, .nci_send = nfcmrvl_usb_nci_send, }; static void nfcmrvl_waker(struct work_struct *work) { struct nfcmrvl_usb_drv_data *drv_data = container_of(work, struct nfcmrvl_usb_drv_data, waker); int err; err = usb_autopm_get_interface(drv_data->intf); if (err) return; usb_autopm_put_interface(drv_data->intf); } static int nfcmrvl_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct nfcmrvl_usb_drv_data *drv_data; struct nfcmrvl_private *priv; int i; struct usb_device *udev = interface_to_usbdev(intf); struct nfcmrvl_platform_data config; /* No configuration for USB */ memset(&config, 0, sizeof(config)); config.reset_n_io = -EINVAL; nfc_info(&udev->dev, "intf %p id %p\n", intf, id); drv_data = devm_kzalloc(&intf->dev, sizeof(*drv_data), GFP_KERNEL); if (!drv_data) return -ENOMEM; for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { struct usb_endpoint_descriptor *ep_desc; ep_desc = &intf->cur_altsetting->endpoint[i].desc; if (!drv_data->bulk_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) { drv_data->bulk_tx_ep = ep_desc; } else if (!drv_data->bulk_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) { drv_data->bulk_rx_ep = ep_desc; } } if (!drv_data->bulk_tx_ep || !drv_data->bulk_rx_ep) return -ENODEV; drv_data->udev = udev; drv_data->intf = intf; INIT_WORK(&drv_data->waker, nfcmrvl_waker); spin_lock_init(&drv_data->txlock); init_usb_anchor(&drv_data->tx_anchor); init_usb_anchor(&drv_data->bulk_anchor); init_usb_anchor(&drv_data->deferred); priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_USB, drv_data, &usb_ops, &intf->dev, &config); if (IS_ERR(priv)) return PTR_ERR(priv); drv_data->priv = priv; drv_data->priv->support_fw_dnld = false; usb_set_intfdata(intf, drv_data); return 0; } static void nfcmrvl_disconnect(struct usb_interface *intf) { struct nfcmrvl_usb_drv_data *drv_data = usb_get_intfdata(intf); if (!drv_data) return; nfc_info(&drv_data->udev->dev, "intf %p\n", intf); nfcmrvl_nci_unregister_dev(drv_data->priv); usb_set_intfdata(drv_data->intf, NULL); } #ifdef CONFIG_PM static int nfcmrvl_suspend(struct usb_interface *intf, pm_message_t message) { struct nfcmrvl_usb_drv_data *drv_data = usb_get_intfdata(intf); nfc_info(&drv_data->udev->dev, "intf %p\n", intf); if (drv_data->suspend_count++) return 0; spin_lock_irq(&drv_data->txlock); if (!(PMSG_IS_AUTO(message) && drv_data->tx_in_flight)) { set_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags); spin_unlock_irq(&drv_data->txlock); } else { spin_unlock_irq(&drv_data->txlock); drv_data->suspend_count--; return -EBUSY; } nfcmrvl_usb_stop_traffic(drv_data); usb_kill_anchored_urbs(&drv_data->tx_anchor); return 0; } static void nfcmrvl_play_deferred(struct nfcmrvl_usb_drv_data *drv_data) { struct urb *urb; int err; while ((urb = usb_get_from_anchor(&drv_data->deferred))) { usb_anchor_urb(urb, &drv_data->tx_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { kfree(urb->setup_packet); usb_unanchor_urb(urb); usb_free_urb(urb); break; } drv_data->tx_in_flight++; usb_free_urb(urb); } /* Cleanup the rest deferred urbs. */ while ((urb = usb_get_from_anchor(&drv_data->deferred))) { kfree(urb->setup_packet); usb_free_urb(urb); } } static int nfcmrvl_resume(struct usb_interface *intf) { struct nfcmrvl_usb_drv_data *drv_data = usb_get_intfdata(intf); int err = 0; nfc_info(&drv_data->udev->dev, "intf %p\n", intf); if (--drv_data->suspend_count) return 0; if (!test_bit(NFCMRVL_NCI_RUNNING, &drv_data->flags)) goto done; if (test_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags)) { err = nfcmrvl_submit_bulk_urb(drv_data, GFP_NOIO); if (err) { clear_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags); goto failed; } nfcmrvl_submit_bulk_urb(drv_data, GFP_NOIO); } spin_lock_irq(&drv_data->txlock); nfcmrvl_play_deferred(drv_data); clear_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags); spin_unlock_irq(&drv_data->txlock); return 0; failed: usb_scuttle_anchored_urbs(&drv_data->deferred); done: spin_lock_irq(&drv_data->txlock); clear_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags); spin_unlock_irq(&drv_data->txlock); return err; } #endif static struct usb_driver nfcmrvl_usb_driver = { .name = "nfcmrvl", .probe = nfcmrvl_probe, .disconnect = nfcmrvl_disconnect, #ifdef CONFIG_PM .suspend = nfcmrvl_suspend, .resume = nfcmrvl_resume, .reset_resume = nfcmrvl_resume, #endif .id_table = nfcmrvl_table, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, .soft_unbind = 1, }; module_usb_driver(nfcmrvl_usb_driver); MODULE_AUTHOR("Marvell International Ltd."); MODULE_DESCRIPTION("Marvell NFC-over-USB driver"); MODULE_LICENSE("GPL v2");
29 29 74 8 75 1 2 18 18 18 2 2 2 1 3 3 2 7 48 55 55 55 1 1 7 37 23 23 6 3 3 3 3 3 12 3 4 12 1 11 1 1 1 16 16 16 16 16 29 29 29 27 29 2 2 2 2 1985 1981 1977 8 8 2 2 1 5 13 13 13 13 13 7 31 31 1 1 1 3 1 3 3 1 5 1 1 2 1 7 1 1 2 2 2 6 67 1 2 53 11 49 2 9 4 60 58 6 1 1 2 1 1 12 1 1 2 1 2 3 2 3 2 4 1 5 11 4 3 3 2 2 2 2 6 1 2 1 2 14 1 2 1 1 4 2 3 3 2 3 2 3 1 1 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 /* * net/tipc/bearer.c: TIPC bearer code * * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <net/sock.h> #include "core.h" #include "bearer.h" #include "link.h" #include "discover.h" #include "monitor.h" #include "bcast.h" #include "netlink.h" #include "udp_media.h" #include "trace.h" #include "crypto.h" #define MAX_ADDR_STR 60 static struct tipc_media * const media_info_array[] = { &eth_media_info, #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, #endif #ifdef CONFIG_TIPC_MEDIA_UDP &udp_media_info, #endif NULL }; static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); return rcu_dereference(tn->bearer_list[bearer_id]); } static void bearer_disable(struct net *net, struct tipc_bearer *b); static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name * @name: name to locate */ struct tipc_media *tipc_media_find(const char *name) { u32 i; for (i = 0; media_info_array[i] != NULL; i++) { if (!strcmp(media_info_array[i]->name, name)) break; } return media_info_array[i]; } /** * media_find_id - locates specified media object by type identifier * @type: type identifier to locate */ static struct tipc_media *media_find_id(u8 type) { u32 i; for (i = 0; media_info_array[i] != NULL; i++) { if (media_info_array[i]->type_id == type) break; } return media_info_array[i]; } /** * tipc_media_addr_printf - record media address in print buffer * @buf: output buffer * @len: output buffer size remaining * @a: input media address */ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; struct tipc_media *m; int ret; m = media_find_id(a->media_id); if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); for (i = 0; i < sizeof(a->value); i++) ret += scnprintf(buf + ret, len - ret, "-%x", a->value[i]); } return ret; } /** * bearer_name_validate - validate & (optionally) deconstruct bearer name * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) * * Return: 1 if bearer name is valid, otherwise 0. */ static int bearer_name_validate(const char *name, struct tipc_bearer_names *name_parts) { char name_copy[TIPC_MAX_BEARER_NAME]; char *media_name; char *if_name; u32 media_len; u32 if_len; /* copy bearer name & ensure length is OK */ if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0) return 0; /* ensure all component parts of bearer name are present */ media_name = name_copy; if_name = strchr(media_name, ':'); if (if_name == NULL) return 0; *(if_name++) = 0; media_len = if_name - media_name; if_len = strlen(if_name) + 1; /* validate component parts of bearer name */ if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) return 0; /* return bearer name components, if necessary */ if (name_parts) { if (strscpy(name_parts->media_name, media_name, TIPC_MAX_MEDIA_NAME) < 0) return 0; if (strscpy(name_parts->if_name, if_name, TIPC_MAX_IF_NAME) < 0) return 0; } return 1; } /** * tipc_bearer_find - locates bearer object with matching bearer name * @net: the applicable net namespace * @name: bearer name to locate */ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { b = rtnl_dereference(tn->bearer_list[i]); if (b && (!strcmp(b->name, name))) return b; } return NULL; } /* tipc_bearer_get_name - get the bearer name from its id. * @net: network namespace * @name: a pointer to the buffer where the name will be stored. * @bearer_id: the id to get the name from. */ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; if (bearer_id >= MAX_BEARERS) return -EINVAL; b = rtnl_dereference(tn->bearer_list[bearer_id]); if (!b) return -EINVAL; strcpy(name, b->name); return 0; } void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) tipc_disc_add_dest(b->disc); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) tipc_disc_remove_dest(b->disc); rcu_read_unlock(); } /** * tipc_enable_bearer - enable bearer with the given name * @net: the applicable net namespace * @name: bearer name to enable * @disc_domain: bearer domain * @prio: bearer priority * @attr: nlattr array * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, struct nlattr *attr[], struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; int with_this_prio = 1; struct tipc_bearer *b; struct tipc_media *m; struct sk_buff *skb; int bearer_id = 0; int res = -EINVAL; char *errstr = ""; u32 i; if (!bearer_name_validate(name, &b_names)) { NL_SET_ERR_MSG(extack, "Illegal name"); return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } if (prio == TIPC_MEDIA_LINK_PRI) prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ bearer_id = MAX_BEARERS; i = MAX_BEARERS; while (i-- != 0) { b = rtnl_dereference(tn->bearer_list[i]); if (!b) { bearer_id = i; continue; } if (!strcmp(name, b->name)) { errstr = "already enabled"; NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } if (b->priority == prio && (++with_this_prio > 2)) { pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", name, prio); if (prio == TIPC_MIN_LINK_PRI) { errstr = "cannot adjust to lower"; NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); goto rejected; } pr_warn("Bearer <%s>: trying with adjusted priority\n", name); prio--; bearer_id = MAX_BEARERS; i = MAX_BEARERS; with_this_prio = 1; } } if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } b = kzalloc(sizeof(*b), GFP_ATOMIC); if (!b) return -ENOMEM; strscpy(b->name, name); b->media = m; res = m->enable_media(net, b, attr); if (res) { kfree(b); errstr = "failed to enable media"; NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } b->identity = bearer_id; b->tolerance = m->tolerance; b->min_win = m->min_win; b->max_win = m->max_win; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } /* Create monitoring data before accepting activate messages */ if (tipc_mon_create(net, bearer_id)) { bearer_disable(net, b); kfree_skb(skb); return -ENOMEM; } test_and_set_bit_lock(0, &b->up); rcu_assign_pointer(tn->bearer_list[bearer_id], b); if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); pr_info("Enabled bearer <%s>, priority %u\n", name, prio); return res; rejected: pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr); return res; } /** * tipc_reset_bearer - Reset all links established over this bearer * @net: the applicable net namespace * @b: the target bearer */ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { pr_info("Resetting bearer <%s>\n", b->name); tipc_node_delete_links(net, b->identity); tipc_disc_reset(net, b); return 0; } bool tipc_bearer_hold(struct tipc_bearer *b) { return (b && refcount_inc_not_zero(&b->refcnt)); } void tipc_bearer_put(struct tipc_bearer *b) { if (b && refcount_dec_and_test(&b->refcnt)) kfree_rcu(b, rcu); } /** * bearer_disable - disable this bearer * @net: the applicable net namespace * @b: the bearer to disable * * Note: This routine assumes caller holds RTNL lock. */ static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = tipc_net(net); int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); clear_bit_unlock(0, &b->up); tipc_node_delete_links(net, bearer_id); b->media->disable_media(b); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->disc) tipc_disc_delete(b->disc); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); tipc_bearer_put(b); tipc_mon_delete(net, bearer_id); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]) { char *dev_name = strchr((const char *)b->name, ':') + 1; int hwaddr_len = b->media->hwaddr_len; u8 node_id[NODE_ID_LEN] = {0,}; struct net_device *dev; /* Find device with specified name */ dev = dev_get_by_name(net, dev_name); if (!dev) return -ENODEV; if (tipc_mtu_bad(dev)) { dev_put(dev); return -EINVAL; } if (dev == net->loopback_dev) { dev_put(dev); pr_info("Enabling <%s> not permitted\n", b->name); return -EINVAL; } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { memcpy(node_id, dev->dev_addr, hwaddr_len); tipc_net_init(net, node_id, 0); } if (!tipc_own_id(net)) { dev_put(dev); pr_warn("Failed to obtain node identity\n"); return -EINVAL; } /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); b->pt.dev = dev; b->pt.type = htons(ETH_P_TIPC); b->pt.func = tipc_l2_rcv_msg; dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len); b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * @b: the target bearer * * Mark L2 bearer as inactive so that incoming buffers are thrown away */ void tipc_disable_l2_media(struct tipc_bearer *b) { struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); dev_remove_pack(&b->pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); } /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @net: the associated network namespace * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct net_device *dev; int delta; dev = (struct net_device *)rcu_dereference(b->media_ptr); if (!dev) return 0; delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { kfree_skb(skb); return 0; } skb_reset_network_header(skb); skb->dev = dev; skb->protocol = htons(ETH_P_TIPC); dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; } bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id) { bool supp = false; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) supp = (b->bcast_addr.broadcast == TIPC_BROADCAST_SUPPORT); rcu_read_unlock(); return supp; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) { int mtu = 0; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) mtu = b->mtu; rcu_read_unlock(); return mtu; } int tipc_bearer_min_mtu(struct net *net, u32 bearer_id) { int mtu = TIPC_MIN_BEARER_MTU; struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (b) mtu += b->encap_hlen; rcu_read_unlock(); return mtu; } /* tipc_bearer_xmit_skb - sends buffer to destination over bearer */ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct sk_buff *skb, struct tipc_media_addr *dest) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_bearer *b; rcu_read_lock(); b = bearer_get(net, bearer_id); if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dest, NULL); if (skb) #endif b->media->send_msg(net, skb, b, dest); } else { kfree_skb(skb); } rcu_read_unlock(); } /* tipc_bearer_xmit() -send buffer to destination over bearer */ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst, struct tipc_node *__dnode) { struct tipc_bearer *b; struct sk_buff *skb, *tmp; if (skb_queue_empty(xmitq)) return; rcu_read_lock(); b = bearer_get(net, bearer_id); if (unlikely(!b)) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, __dnode); if (skb) #endif b->media->send_msg(net, skb, b, dst); } else { kfree_skb(skb); } } rcu_read_unlock(); } /* tipc_bearer_bc_xmit() - broadcast buffers to all destinations */ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq) { struct tipc_net *tn = tipc_net(net); struct tipc_media_addr *dst; int net_id = tn->net_id; struct tipc_bearer *b; struct sk_buff *skb, *tmp; struct tipc_msg *hdr; rcu_read_lock(); b = bearer_get(net, bearer_id); if (unlikely(!b || !test_bit(0, &b->up))) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { hdr = buf_msg(skb); msg_set_non_seq(hdr, 1); msg_set_mc_netid(hdr, net_id); __skb_dequeue(xmitq); dst = &b->bcast_addr; #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, NULL); if (skb) #endif b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond * * Accept only packets explicitly sent to this node, or broadcast packets; * ignores packets sent using interface multicast, and traffic sent to other * nodes (which can happen if interface is running in promiscuous mode). */ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct tipc_bearer *b; rcu_read_lock(); b = rcu_dereference(dev->tipc_ptr) ?: rcu_dereference(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb_mark_not_on_list(skb); TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } rcu_read_unlock(); kfree_skb(skb); return NET_RX_DROP; } /** * tipc_l2_device_event - handle device events from network device * @nb: the context of the notification * @evt: the type of event * @ptr: the net device that the event was on * * This function is called by the Ethernet driver in case of link * change event. */ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tipc_bearer *b; b = rtnl_dereference(dev->tipc_ptr); if (!b) return NOTIFY_DONE; trace_tipc_l2_device_event(dev, b, evt); switch (evt) { case NETDEV_CHANGE: if (netif_carrier_ok(dev) && netif_oper_up(dev)) { test_and_set_bit_lock(0, &b->up); break; } fallthrough; case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; case NETDEV_UP: test_and_set_bit_lock(0, &b->up); break; case NETDEV_CHANGEMTU: if (tipc_mtu_bad(dev)) { bearer_disable(net, b); break; } b->mtu = dev->mtu; tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: bearer_disable(net, b); break; } return NOTIFY_OK; } static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, }; int tipc_bearer_setup(void) { return register_netdevice_notifier(&notifier); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(&notifier); } void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { b = rtnl_dereference(tn->bearer_list[i]); if (b) { bearer_disable(net, b); tn->bearer_list[i] = NULL; } } } void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) { struct net_device *dev = net->loopback_dev; struct sk_buff *skb, *_skb; int exp; skb_queue_walk(pkts, _skb) { skb = pskb_copy(_skb, GFP_ATOMIC); if (!skb) continue; exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { kfree_skb(skb); continue; } skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, dev->dev_addr, skb->len); skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); } } static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *od) { consume_skb(skb); return NET_RX_SUCCESS; } int tipc_attach_loopback(struct net *net) { struct net_device *dev = net->loopback_dev; struct tipc_net *tn = tipc_net(net); if (!dev) return -ENODEV; netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; dev_add_pack(&tn->loopback_pt); return 0; } void tipc_detach_loopback(struct net *net) { struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); } /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) goto prop_msg_full; if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) goto prop_msg_full; nla_nest_end(msg->skb, prop); #ifdef CONFIG_TIPC_MEDIA_UDP if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { if (tipc_udp_nl_add_bearer_data(msg, bearer)) goto attr_msg_full; } #endif nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; int i = cb->args[0]; struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct net *net = sock_net(skb->sk); struct tipc_net *tn = tipc_net(net); if (i == MAX_BEARERS) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (i = 0; i < MAX_BEARERS; i++) { bearer = rtnl_dereference(tn->bearer_list[i]); if (!bearer) continue; err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); if (err) break; } rtnl_unlock(); cb->args[0] = i; return skb->len; } int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct sk_buff *rep; struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!rep) return -ENOMEM; msg.skb = rep; msg.portid = info->snd_portid; msg.seq = info->snd_seq; rtnl_lock(); bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } err = __tipc_nl_add_bearer(&msg, bearer, 0); if (err) goto err_out; rtnl_unlock(); return genlmsg_reply(rep, info); err_out: rtnl_unlock(); nlmsg_free(rep); return err; } int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_bearer *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); if (!bearer) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } bearer_disable(net, bearer); return 0; } int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_disable(skb, info); rtnl_unlock(); return err; } int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); u32 domain = 0; u32 prio; prio = TIPC_MEDIA_LINK_PRI; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); if (attrs[TIPC_NLA_BEARER_DOMAIN]) domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_PRIO]) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } return tipc_enable_bearer(net, bearer, domain, prio, attrs, info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_enable(skb, info); rtnl_unlock(); return err; } int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); b = tipc_bearer_find(net, name); if (!b) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); err = -EINVAL; goto out; } #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); err = -EINVAL; goto out; } err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); } #endif out: rtnl_unlock(); return err; } int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; struct net *net = sock_net(skb->sk); char *name; int err; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], tipc_nl_bearer_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_BEARER_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); if (!b) { NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_TOL]) { b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); } if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "MTU property is unsupported"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) < b->encap_hlen + TIPC_MIN_BEARER_MTU) { NL_SET_ERR_MSG(info->extack, "MTU value is out-of-range"); return -EINVAL; } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif } } return 0; } int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_bearer_set(skb, info); rtnl_unlock(); return err; } static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media, int nlflags) { void *hdr; struct nlattr *attrs; struct nlattr *prop; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) goto prop_msg_full; if (media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) goto prop_msg_full; nla_nest_end(msg->skb, prop); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; int i = cb->args[0]; struct tipc_nl_msg msg; if (i == MAX_MEDIA) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (; media_info_array[i] != NULL; i++) { err = __tipc_nl_add_media(&msg, media_info_array[i], NLM_F_MULTI); if (err) break; } rtnl_unlock(); cb->args[0] = i; return skb->len; } int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], tipc_nl_media_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!rep) return -ENOMEM; msg.skb = rep; msg.portid = info->snd_portid; msg.seq = info->snd_seq; rtnl_lock(); media = tipc_media_find(name); if (!media) { NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } err = __tipc_nl_add_media(&msg, media, 0); if (err) goto err_out; rtnl_unlock(); return genlmsg_reply(rep, info); err_out: rtnl_unlock(); nlmsg_free(rep); return err; } int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; struct tipc_media *m; struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); if (!m) { NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], props); if (err) return err; if (props[TIPC_NLA_PROP_TOL]) m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); if (props[TIPC_NLA_PROP_PRIO]) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (m->type_id != TIPC_MEDIA_TYPE_UDP) { NL_SET_ERR_MSG(info->extack, "MTU property is unsupported"); return -EINVAL; } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 (props[TIPC_NLA_PROP_MTU]))) { NL_SET_ERR_MSG(info->extack, "MTU value is out-of-range"); return -EINVAL; } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } } return 0; } int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_media_set(skb, info); rtnl_unlock(); return err; }
3 3 3 2 3 3 1 2 3 3 3 3 3 1 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/em_meta.c Metadata ematch * * Authors: Thomas Graf <tgraf@suug.ch> * * ========================================================================== * * The metadata ematch compares two meta objects where each object * represents either a meta value stored in the kernel or a static * value provided by userspace. The objects are not provided by * userspace itself but rather a definition providing the information * to build them. Every object is of a certain type which must be * equal to the object it is being compared to. * * The definition of a objects conists of the type (meta type), a * identifier (meta id) and additional type specific information. * The meta id is either TCF_META_TYPE_VALUE for values provided by * userspace or a index to the meta operations table consisting of * function pointers to type specific meta data collectors returning * the value of the requested meta value. * * lvalue rvalue * +-----------+ +-----------+ * | type: INT | | type: INT | * def | id: DEV | | id: VALUE | * | data: | | data: 3 | * +-----------+ +-----------+ * | | * ---> meta_ops[INT][DEV](...) | * | | * ----------- | * V V * +-----------+ +-----------+ * | type: INT | | type: INT | * obj | id: DEV | | id: VALUE | * | data: 2 |<--data got filled out | data: 3 | * +-----------+ +-----------+ * | | * --------------> 2 equals 3 <-------------- * * This is a simplified schema, the complexity varies depending * on the meta type. Obviously, the length of the data must also * be provided for non-numeric types. * * Additionally, type dependent modifiers such as shift operators * or mask may be applied to extend the functionality. As of now, * the variable length type supports shifting the byte string to * the right, eating up any number of octets and thus supporting * wildcard interface name comparisons such as "ppp%" matching * ppp0..9. * * NOTE: Certain meta values depend on other subsystems and are * only available if that subsystem is enabled in the kernel. */ #include <linux/slab.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/loadavg.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/random.h> #include <linux/if_vlan.h> #include <linux/tc_ematch/tc_em_meta.h> #include <net/dst.h> #include <net/route.h> #include <net/pkt_cls.h> #include <net/sock.h> struct meta_obj { unsigned long value; unsigned int len; }; struct meta_value { struct tcf_meta_val hdr; unsigned long val; unsigned int len; }; struct meta_match { struct meta_value lvalue; struct meta_value rvalue; }; static inline int meta_id(struct meta_value *v) { return TCF_META_ID(v->hdr.kind); } static inline int meta_type(struct meta_value *v) { return TCF_META_TYPE(v->hdr.kind); } #define META_COLLECTOR(FUNC) static void meta_##FUNC(struct sk_buff *skb, \ struct tcf_pkt_info *info, struct meta_value *v, \ struct meta_obj *dst, int *err) /************************************************************************** * System status & misc **************************************************************************/ META_COLLECTOR(int_random) { get_random_bytes(&dst->value, sizeof(dst->value)); } static inline unsigned long fixed_loadavg(int load) { int rnd_load = load + (FIXED_1/200); int rnd_frac = ((rnd_load & (FIXED_1-1)) * 100) >> FSHIFT; return ((rnd_load >> FSHIFT) * 100) + rnd_frac; } META_COLLECTOR(int_loadavg_0) { dst->value = fixed_loadavg(avenrun[0]); } META_COLLECTOR(int_loadavg_1) { dst->value = fixed_loadavg(avenrun[1]); } META_COLLECTOR(int_loadavg_2) { dst->value = fixed_loadavg(avenrun[2]); } /************************************************************************** * Device names & indices **************************************************************************/ static inline int int_dev(struct net_device *dev, struct meta_obj *dst) { if (unlikely(dev == NULL)) return -1; dst->value = dev->ifindex; return 0; } static inline int var_dev(struct net_device *dev, struct meta_obj *dst) { if (unlikely(dev == NULL)) return -1; dst->value = (unsigned long) dev->name; dst->len = strlen(dev->name); return 0; } META_COLLECTOR(int_dev) { *err = int_dev(skb->dev, dst); } META_COLLECTOR(var_dev) { *err = var_dev(skb->dev, dst); } /************************************************************************** * vlan tag **************************************************************************/ META_COLLECTOR(int_vlan_tag) { unsigned short tag; if (skb_vlan_tag_present(skb)) dst->value = skb_vlan_tag_get(skb); else if (!__vlan_get_tag(skb, &tag)) dst->value = tag; else *err = -1; } /************************************************************************** * skb attributes **************************************************************************/ META_COLLECTOR(int_priority) { dst->value = skb->priority; } META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ dst->value = skb_protocol(skb, false); } META_COLLECTOR(int_pkttype) { dst->value = skb->pkt_type; } META_COLLECTOR(int_pktlen) { dst->value = skb->len; } META_COLLECTOR(int_datalen) { dst->value = skb->data_len; } META_COLLECTOR(int_maclen) { dst->value = skb->mac_len; } META_COLLECTOR(int_rxhash) { dst->value = skb_get_hash(skb); } /************************************************************************** * Netfilter **************************************************************************/ META_COLLECTOR(int_mark) { dst->value = skb->mark; } /************************************************************************** * Traffic Control **************************************************************************/ META_COLLECTOR(int_tcindex) { dst->value = skb->tc_index; } /************************************************************************** * Routing **************************************************************************/ META_COLLECTOR(int_rtclassid) { if (unlikely(skb_dst(skb) == NULL)) *err = -1; else #ifdef CONFIG_IP_ROUTE_CLASSID dst->value = skb_dst(skb)->tclassid; #else dst->value = 0; #endif } META_COLLECTOR(int_rtiif) { if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else dst->value = inet_iif(skb); } /************************************************************************** * Socket Attributes **************************************************************************/ #define skip_nonlocal(skb) \ (unlikely(skb->sk == NULL)) META_COLLECTOR(int_sk_family) { if (skip_nonlocal(skb)) { *err = -1; return; } dst->value = skb->sk->sk_family; } META_COLLECTOR(int_sk_state) { if (skip_nonlocal(skb)) { *err = -1; return; } dst->value = skb->sk->sk_state; } META_COLLECTOR(int_sk_reuse) { if (skip_nonlocal(skb)) { *err = -1; return; } dst->value = skb->sk->sk_reuse; } META_COLLECTOR(int_sk_bound_if) { if (skip_nonlocal(skb)) { *err = -1; return; } /* No error if bound_dev_if is 0, legal userspace check */ dst->value = skb->sk->sk_bound_dev_if; } META_COLLECTOR(var_sk_bound_if) { int bound_dev_if; if (skip_nonlocal(skb)) { *err = -1; return; } bound_dev_if = READ_ONCE(skb->sk->sk_bound_dev_if); if (bound_dev_if == 0) { dst->value = (unsigned long) "any"; dst->len = 3; } else { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(skb->sk), bound_dev_if); *err = var_dev(dev, dst); rcu_read_unlock(); } } META_COLLECTOR(int_sk_refcnt) { if (skip_nonlocal(skb)) { *err = -1; return; } dst->value = refcount_read(&skb->sk->sk_refcnt); } META_COLLECTOR(int_sk_rcvbuf) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_rcvbuf; } META_COLLECTOR(int_sk_shutdown) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_shutdown; } META_COLLECTOR(int_sk_proto) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_protocol; } META_COLLECTOR(int_sk_type) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_type; } META_COLLECTOR(int_sk_rmem_alloc) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk_rmem_alloc_get(sk); } META_COLLECTOR(int_sk_wmem_alloc) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk_wmem_alloc_get(sk); } META_COLLECTOR(int_sk_omem_alloc) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = atomic_read(&sk->sk_omem_alloc); } META_COLLECTOR(int_sk_rcv_qlen) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_receive_queue.qlen; } META_COLLECTOR(int_sk_snd_qlen) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_write_queue.qlen; } META_COLLECTOR(int_sk_wmem_queued) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_wmem_queued); } META_COLLECTOR(int_sk_fwd_alloc) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_forward_alloc); } META_COLLECTOR(int_sk_sndbuf) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_sndbuf; } META_COLLECTOR(int_sk_alloc) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = (__force int) sk->sk_allocation; } META_COLLECTOR(int_sk_hash) { if (skip_nonlocal(skb)) { *err = -1; return; } dst->value = skb->sk->sk_hash; } META_COLLECTOR(int_sk_lingertime) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_lingertime) / HZ; } META_COLLECTOR(int_sk_err_qlen) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_error_queue.qlen; } META_COLLECTOR(int_sk_ack_bl) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_ack_backlog); } META_COLLECTOR(int_sk_max_ack_bl) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_max_ack_backlog); } META_COLLECTOR(int_sk_prio) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_priority); } META_COLLECTOR(int_sk_rcvlowat) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_rcvlowat); } META_COLLECTOR(int_sk_rcvtimeo) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ; } META_COLLECTOR(int_sk_sndtimeo) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ; } META_COLLECTOR(int_sk_sendmsg_off) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) { const struct sock *sk = skb_to_full_sk(skb); if (!sk) { *err = -1; return; } dst->value = sk->sk_write_pending; } /************************************************************************** * Meta value collectors assignment table **************************************************************************/ struct meta_ops { void (*get)(struct sk_buff *, struct tcf_pkt_info *, struct meta_value *, struct meta_obj *, int *); }; #define META_ID(name) TCF_META_ID_##name #define META_FUNC(name) { .get = meta_##name } /* Meta value operations table listing all meta value collectors and * assigns them to a type and meta id. */ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = { [TCF_META_TYPE_VAR] = { [META_ID(DEV)] = META_FUNC(var_dev), [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), }, [TCF_META_TYPE_INT] = { [META_ID(RANDOM)] = META_FUNC(int_random), [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0), [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1), [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2), [META_ID(DEV)] = META_FUNC(int_dev), [META_ID(PRIORITY)] = META_FUNC(int_priority), [META_ID(PROTOCOL)] = META_FUNC(int_protocol), [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), [META_ID(PKTLEN)] = META_FUNC(int_pktlen), [META_ID(DATALEN)] = META_FUNC(int_datalen), [META_ID(MACLEN)] = META_FUNC(int_maclen), [META_ID(NFMARK)] = META_FUNC(int_mark), [META_ID(TCINDEX)] = META_FUNC(int_tcindex), [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid), [META_ID(RTIIF)] = META_FUNC(int_rtiif), [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family), [META_ID(SK_STATE)] = META_FUNC(int_sk_state), [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse), [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if), [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt), [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf), [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf), [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown), [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto), [META_ID(SK_TYPE)] = META_FUNC(int_sk_type), [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc), [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc), [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc), [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued), [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen), [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen), [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen), [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), [META_ID(SK_HASH)] = META_FUNC(int_sk_hash), [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio), [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat), [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo), [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo), [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), [META_ID(RXHASH)] = META_FUNC(int_rxhash), } }; static inline struct meta_ops *meta_ops(struct meta_value *val) { return &__meta_ops[meta_type(val)][meta_id(val)]; } /************************************************************************** * Type specific operations for TCF_META_TYPE_VAR **************************************************************************/ static int meta_var_compare(struct meta_obj *a, struct meta_obj *b) { int r = a->len - b->len; if (r == 0) r = memcmp((void *) a->value, (void *) b->value, a->len); return r; } static int meta_var_change(struct meta_value *dst, struct nlattr *nla) { int len = nla_len(nla); dst->val = (unsigned long)kmemdup(nla_data(nla), len, GFP_KERNEL); if (dst->val == 0UL) return -ENOMEM; dst->len = len; return 0; } static void meta_var_destroy(struct meta_value *v) { kfree((void *) v->val); } static void meta_var_apply_extras(struct meta_value *v, struct meta_obj *dst) { int shift = v->hdr.shift; if (shift && shift < dst->len) dst->len -= shift; } static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv) { if (v->val && v->len && nla_put(skb, tlv, v->len, (void *) v->val)) goto nla_put_failure; return 0; nla_put_failure: return -1; } /************************************************************************** * Type specific operations for TCF_META_TYPE_INT **************************************************************************/ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b) { /* Let gcc optimize it, the unlikely is not really based on * some numbers but jump free code for mismatches seems * more logical. */ if (unlikely(a->value == b->value)) return 0; else if (a->value < b->value) return -1; else return 1; } static int meta_int_change(struct meta_value *dst, struct nlattr *nla) { if (nla_len(nla) >= sizeof(unsigned long)) { dst->val = *(unsigned long *) nla_data(nla); dst->len = sizeof(unsigned long); } else if (nla_len(nla) == sizeof(u32)) { dst->val = nla_get_u32(nla); dst->len = sizeof(u32); } else return -EINVAL; return 0; } static void meta_int_apply_extras(struct meta_value *v, struct meta_obj *dst) { if (v->hdr.shift) dst->value >>= v->hdr.shift; if (v->val) dst->value &= v->val; } static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv) { if (v->len == sizeof(unsigned long)) { if (nla_put(skb, tlv, sizeof(unsigned long), &v->val)) goto nla_put_failure; } else if (v->len == sizeof(u32)) { if (nla_put_u32(skb, tlv, v->val)) goto nla_put_failure; } return 0; nla_put_failure: return -1; } /************************************************************************** * Type specific operations table **************************************************************************/ struct meta_type_ops { void (*destroy)(struct meta_value *); int (*compare)(struct meta_obj *, struct meta_obj *); int (*change)(struct meta_value *, struct nlattr *); void (*apply_extras)(struct meta_value *, struct meta_obj *); int (*dump)(struct sk_buff *, struct meta_value *, int); }; static const struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { [TCF_META_TYPE_VAR] = { .destroy = meta_var_destroy, .compare = meta_var_compare, .change = meta_var_change, .apply_extras = meta_var_apply_extras, .dump = meta_var_dump }, [TCF_META_TYPE_INT] = { .compare = meta_int_compare, .change = meta_int_change, .apply_extras = meta_int_apply_extras, .dump = meta_int_dump } }; static inline const struct meta_type_ops *meta_type_ops(struct meta_value *v) { return &__meta_type_ops[meta_type(v)]; } /************************************************************************** * Core **************************************************************************/ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, struct meta_value *v, struct meta_obj *dst) { int err = 0; if (meta_id(v) == TCF_META_ID_VALUE) { dst->value = v->val; dst->len = v->len; return 0; } meta_ops(v)->get(skb, info, v, dst, &err); if (err < 0) return err; if (meta_type_ops(v)->apply_extras) meta_type_ops(v)->apply_extras(v, dst); return 0; } static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, struct tcf_pkt_info *info) { int r; struct meta_match *meta = (struct meta_match *) m->data; struct meta_obj l_value, r_value; if (meta_get(skb, info, &meta->lvalue, &l_value) < 0 || meta_get(skb, info, &meta->rvalue, &r_value) < 0) return 0; r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); switch (meta->lvalue.hdr.op) { case TCF_EM_OPND_EQ: return !r; case TCF_EM_OPND_LT: return r < 0; case TCF_EM_OPND_GT: return r > 0; } return 0; } static void meta_delete(struct meta_match *meta) { if (meta) { const struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); if (ops && ops->destroy) { ops->destroy(&meta->lvalue); ops->destroy(&meta->rvalue); } } kfree(meta); } static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla) { if (nla) { if (nla_len(nla) == 0) return -EINVAL; return meta_type_ops(dst)->change(dst, nla); } return 0; } static inline int meta_is_supported(struct meta_value *val) { return !meta_id(val) || meta_ops(val)->get; } static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { [TCA_EM_META_HDR] = { .len = sizeof(struct tcf_meta_hdr) }, }; static int em_meta_change(struct net *net, void *data, int len, struct tcf_ematch *m) { int err; struct nlattr *tb[TCA_EM_META_MAX + 1]; struct tcf_meta_hdr *hdr; struct meta_match *meta = NULL; err = nla_parse_deprecated(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL); if (err < 0) goto errout; err = -EINVAL; if (tb[TCA_EM_META_HDR] == NULL) goto errout; hdr = nla_data(tb[TCA_EM_META_HDR]); if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) || TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX || TCF_META_ID(hdr->left.kind) > TCF_META_ID_MAX || TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX) goto errout; meta = kzalloc(sizeof(*meta), GFP_KERNEL); if (meta == NULL) { err = -ENOMEM; goto errout; } memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left)); memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right)); if (!meta_is_supported(&meta->lvalue) || !meta_is_supported(&meta->rvalue)) { err = -EOPNOTSUPP; goto errout; } if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE]) < 0 || meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE]) < 0) goto errout; m->datalen = sizeof(*meta); m->data = (unsigned long) meta; err = 0; errout: if (err && meta) meta_delete(meta); return err; } static void em_meta_destroy(struct tcf_ematch *m) { if (m) meta_delete((struct meta_match *) m->data); } static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em) { struct meta_match *meta = (struct meta_match *) em->data; struct tcf_meta_hdr hdr; const struct meta_type_ops *ops; memset(&hdr, 0, sizeof(hdr)); memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left)); memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right)); if (nla_put(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr)) goto nla_put_failure; ops = meta_type_ops(&meta->lvalue); if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 || ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } static struct tcf_ematch_ops em_meta_ops = { .kind = TCF_EM_META, .change = em_meta_change, .match = em_meta_match, .destroy = em_meta_destroy, .dump = em_meta_dump, .owner = THIS_MODULE, .link = LIST_HEAD_INIT(em_meta_ops.link) }; static int __init init_em_meta(void) { return tcf_em_register(&em_meta_ops); } static void __exit exit_em_meta(void) { tcf_em_unregister(&em_meta_ops); } MODULE_DESCRIPTION("ematch classifier for various internal kernel metadata, skb metadata and sk metadata"); MODULE_LICENSE("GPL"); module_init(init_em_meta); module_exit(exit_em_meta); MODULE_ALIAS_TCF_EMATCH(TCF_EM_META);
3 3 3 2 1 3 3 3 3 3 1 1 2 2 2 2 1 10 10 10 10 10 10 8 2 1 2 7 4 7 7 7 7 3 7 7 3 7 6 3 3 3 4 4 3 9 5 3 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 // SPDX-License-Identifier: GPL-2.0-only #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/ptp_clock_kernel.h> #include <net/netdev_lock.h> #include "netlink.h" #include "common.h" #include "bitset.h" #include "ts.h" struct tsinfo_req_info { struct ethnl_req_info base; struct hwtstamp_provider_desc hwprov_desc; }; struct tsinfo_reply_data { struct ethnl_reply_data base; struct kernel_ethtool_ts_info ts_info; struct ethtool_ts_stats stats; }; #define TSINFO_REQINFO(__req_base) \ container_of(__req_base, struct tsinfo_req_info, base) #define TSINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct tsinfo_reply_data, base) #define ETHTOOL_TS_STAT_CNT \ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1)) const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1] = { [ETHTOOL_A_TSINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), [ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER] = NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), }; int ts_parse_hwtst_provider(const struct nlattr *nest, struct hwtstamp_provider_desc *hwprov_desc, struct netlink_ext_ack *extack, bool *mod) { struct nlattr *tb[ARRAY_SIZE(ethnl_ts_hwtst_prov_policy)]; int ret; ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_ts_hwtst_prov_policy) - 1, nest, ethnl_ts_hwtst_prov_policy, extack); if (ret < 0) return ret; if (NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX) || NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER)) return -EINVAL; ethnl_update_u32(&hwprov_desc->index, tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX], mod); ethnl_update_u32(&hwprov_desc->qualifier, tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER], mod); return 0; } static int tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, struct netlink_ext_ack *extack) { struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); bool mod = false; req->hwprov_desc.index = -1; if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) return 0; return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], &req->hwprov_desc, extack, &mod); } static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; if (req->hwprov_desc.index != -1) { ret = ethtool_get_ts_info_by_phc(dev, &data->ts_info, &req->hwprov_desc); ethnl_ops_complete(dev); return ret; } if (req_base->flags & ETHTOOL_FLAG_STATS) { ethtool_stats_init((u64 *)&data->stats, sizeof(data->stats) / sizeof(u64)); if (dev->ethtool_ops->get_ts_stats) dev->ethtool_ops->get_ts_stats(dev, &data->stats); } ret = __ethtool_get_ts_info(dev, &data->ts_info); ethnl_ops_complete(dev); return ret; } static int tsinfo_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int len = 0; int ret; BUILD_BUG_ON(__SOF_TIMESTAMPING_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); if (ts_info->so_timestamping) { ret = ethnl_bitset32_size(&ts_info->so_timestamping, NULL, __SOF_TIMESTAMPING_CNT, sof_timestamping_names, compact); if (ret < 0) return ret; len += ret; /* _TSINFO_TIMESTAMPING */ } if (ts_info->tx_types) { ret = ethnl_bitset32_size(&ts_info->tx_types, NULL, __HWTSTAMP_TX_CNT, ts_tx_type_names, compact); if (ret < 0) return ret; len += ret; /* _TSINFO_TX_TYPES */ } if (ts_info->rx_filters) { ret = ethnl_bitset32_size(&ts_info->rx_filters, NULL, __HWTSTAMP_FILTER_CNT, ts_rx_filter_names, compact); if (ret < 0) return ret; len += ret; /* _TSINFO_RX_FILTERS */ } if (ts_info->phc_index >= 0) { len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */ /* _TSINFO_HWTSTAMP_PROVIDER */ len += nla_total_size(0) + 2 * nla_total_size(sizeof(u32)); } if (ts_info->phc_source) { len += nla_total_size(sizeof(u32)); /* _TSINFO_HWTSTAMP_SOURCE */ if (ts_info->phc_phyindex) /* _TSINFO_HWTSTAMP_PHYINDEX */ len += nla_total_size(sizeof(u32)); } if (req_base->flags & ETHTOOL_FLAG_STATS) len += nla_total_size(0) + /* _TSINFO_STATS */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT; return len; } static int tsinfo_put_stat(struct sk_buff *skb, u64 val, u16 attrtype) { if (val == ETHTOOL_STAT_NOT_SET) return 0; if (nla_put_uint(skb, attrtype, val)) return -EMSGSIZE; return 0; } static int tsinfo_put_stats(struct sk_buff *skb, const struct ethtool_ts_stats *stats) { struct nlattr *nest; nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_STATS); if (!nest) return -EMSGSIZE; if (tsinfo_put_stat(skb, stats->tx_stats.pkts, ETHTOOL_A_TS_STAT_TX_PKTS) || tsinfo_put_stat(skb, stats->tx_stats.onestep_pkts_unconfirmed, ETHTOOL_A_TS_STAT_TX_ONESTEP_PKTS_UNCONFIRMED) || tsinfo_put_stat(skb, stats->tx_stats.lost, ETHTOOL_A_TS_STAT_TX_LOST) || tsinfo_put_stat(skb, stats->tx_stats.err, ETHTOOL_A_TS_STAT_TX_ERR)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int tsinfo_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int ret; if (ts_info->so_timestamping) { ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSINFO_TIMESTAMPING, &ts_info->so_timestamping, NULL, __SOF_TIMESTAMPING_CNT, sof_timestamping_names, compact); if (ret < 0) return ret; } if (ts_info->tx_types) { ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSINFO_TX_TYPES, &ts_info->tx_types, NULL, __HWTSTAMP_TX_CNT, ts_tx_type_names, compact); if (ret < 0) return ret; } if (ts_info->rx_filters) { ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSINFO_RX_FILTERS, &ts_info->rx_filters, NULL, __HWTSTAMP_FILTER_CNT, ts_rx_filter_names, compact); if (ret < 0) return ret; } if (ts_info->phc_index >= 0) { struct nlattr *nest; ret = nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index); if (ret) return -EMSGSIZE; nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, ts_info->phc_index) || nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, ts_info->phc_qualifier)) { nla_nest_cancel(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest); } if (ts_info->phc_source) { if (nla_put_u32(skb, ETHTOOL_A_TSINFO_HWTSTAMP_SOURCE, ts_info->phc_source)) return -EMSGSIZE; if (ts_info->phc_phyindex && nla_put_u32(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PHYINDEX, ts_info->phc_phyindex)) return -EMSGSIZE; } if (req_base->flags & ETHTOOL_FLAG_STATS && tsinfo_put_stats(skb, &data->stats)) return -EMSGSIZE; return 0; } struct ethnl_tsinfo_dump_ctx { struct tsinfo_req_info *req_info; struct tsinfo_reply_data *reply_data; unsigned long pos_ifindex; bool netdev_dump_done; unsigned long pos_phyindex; enum hwtstamp_provider_qualifier pos_phcqualifier; }; static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, struct net_device *dev, struct tsinfo_reply_data *reply_data, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; void *ehdr = NULL; ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_TSINFO_GET_REPLY); if (!ehdr) return ERR_PTR(-EMSGSIZE); reply_data = ctx->reply_data; memset(reply_data, 0, sizeof(*reply_data)); reply_data->base.dev = dev; reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO; reply_data->ts_info.phc_index = -1; return ehdr; } static int ethnl_tsinfo_end_dump(struct sk_buff *skb, struct net_device *dev, struct tsinfo_req_info *req_info, struct tsinfo_reply_data *reply_data, void *ehdr) { int ret; reply_data->ts_info.so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TSINFO_HEADER); if (ret < 0) return ret; ret = tsinfo_fill_reply(skb, &req_info->base, &reply_data->base); if (ret < 0) return ret; reply_data->base.dev = NULL; genlmsg_end(skb, ehdr); return ret; } static int ethnl_tsinfo_dump_one_phydev(struct sk_buff *skb, struct net_device *dev, struct phy_device *phydev, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct tsinfo_reply_data *reply_data; struct tsinfo_req_info *req_info; void *ehdr = NULL; int ret = 0; if (!phy_has_tsinfo(phydev)) return -EOPNOTSUPP; reply_data = ctx->reply_data; req_info = ctx->req_info; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); if (IS_ERR(ehdr)) return PTR_ERR(ehdr); ret = phy_ts_info(phydev, &reply_data->ts_info); if (ret < 0) goto err; if (reply_data->ts_info.phc_index >= 0) { reply_data->ts_info.phc_source = HWTSTAMP_SOURCE_PHYLIB; reply_data->ts_info.phc_phyindex = phydev->phyindex; } ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); if (ret < 0) goto err; return ret; err: genlmsg_cancel(skb, ehdr); return ret; } static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, struct net_device *dev, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; const struct ethtool_ops *ops = dev->ethtool_ops; struct tsinfo_reply_data *reply_data; struct tsinfo_req_info *req_info; void *ehdr = NULL; int ret = 0; if (!ops->get_ts_info) return -EOPNOTSUPP; reply_data = ctx->reply_data; req_info = ctx->req_info; for (; ctx->pos_phcqualifier < HWTSTAMP_PROVIDER_QUALIFIER_CNT; ctx->pos_phcqualifier++) { if (!net_support_hwtstamp_qualifier(dev, ctx->pos_phcqualifier)) continue; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); if (IS_ERR(ehdr)) { ret = PTR_ERR(ehdr); goto err; } reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; ret = ops->get_ts_info(dev, &reply_data->ts_info); if (ret < 0) goto err; if (reply_data->ts_info.phc_index >= 0) reply_data->ts_info.phc_source = HWTSTAMP_SOURCE_NETDEV; ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); if (ret < 0) goto err; } return ret; err: genlmsg_cancel(skb, ehdr); return ret; } static int ethnl_tsinfo_dump_one_net_topo(struct sk_buff *skb, struct net_device *dev, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct phy_device_node *pdn; int ret = 0; if (!ctx->netdev_dump_done) { ret = ethnl_tsinfo_dump_one_netdev(skb, dev, cb); if (ret < 0 && ret != -EOPNOTSUPP) return ret; ctx->netdev_dump_done = true; } if (!dev->link_topo) { if (phy_has_tsinfo(dev->phydev)) { ret = ethnl_tsinfo_dump_one_phydev(skb, dev, dev->phydev, cb); if (ret < 0 && ret != -EOPNOTSUPP) return ret; } return 0; } xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn, ctx->pos_phyindex) { if (phy_has_tsinfo(pdn->phy)) { ret = ethnl_tsinfo_dump_one_phydev(skb, dev, pdn->phy, cb); if (ret < 0 && ret != -EOPNOTSUPP) return ret; } } return ret; } int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); struct net_device *dev; int ret = 0; rtnl_lock(); if (ctx->req_info->base.dev) { dev = ctx->req_info->base.dev; netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->pos_ifindex) { netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); netdev_unlock_ops(dev); if (ret < 0 && ret != -EOPNOTSUPP) break; ctx->pos_phyindex = 0; ctx->netdev_dump_done = false; ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; } } rtnl_unlock(); return ret; } int ethnl_tsinfo_start(struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct nlattr **tb = info->info.attrs; struct tsinfo_reply_data *reply_data; struct tsinfo_req_info *req_info; int ret; BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); if (!req_info) return -ENOMEM; reply_data = kzalloc(sizeof(*reply_data), GFP_KERNEL); if (!reply_data) { ret = -ENOMEM; goto free_req_info; } ret = ethnl_parse_header_dev_get(&req_info->base, tb[ETHTOOL_A_TSINFO_HEADER], sock_net(cb->skb->sk), cb->extack, false); if (ret < 0) goto free_reply_data; ctx->req_info = req_info; ctx->reply_data = reply_data; ctx->pos_ifindex = 0; ctx->pos_phyindex = 0; ctx->netdev_dump_done = false; ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; return 0; free_reply_data: kfree(reply_data); free_req_info: kfree(req_info); return ret; } int ethnl_tsinfo_done(struct netlink_callback *cb) { struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; struct tsinfo_req_info *req_info = ctx->req_info; ethnl_parse_header_dev_put(&req_info->base); kfree(ctx->reply_data); kfree(ctx->req_info); return 0; } const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .request_cmd = ETHTOOL_MSG_TSINFO_GET, .reply_cmd = ETHTOOL_MSG_TSINFO_GET_REPLY, .hdr_attr = ETHTOOL_A_TSINFO_HEADER, .req_info_size = sizeof(struct tsinfo_req_info), .reply_data_size = sizeof(struct tsinfo_reply_data), .parse_request = tsinfo_parse_request, .prepare_data = tsinfo_prepare_data, .reply_size = tsinfo_reply_size, .fill_reply = tsinfo_fill_reply, };
142 10 14 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KBD_KERN_H #define _KBD_KERN_H #include <linux/tty.h> #include <linux/interrupt.h> #include <linux/keyboard.h> extern char *func_table[MAX_NR_FUNC]; /* * kbd->xxx contains the VC-local things (flag settings etc..) * * Note: externally visible are LED_SCR, LED_NUM, LED_CAP defined in kd.h * The code in KDGETLED / KDSETLED depends on the internal and * external order being the same. * * Note: lockstate is used as index in the array key_map. */ struct kbd_struct { unsigned char lockstate; /* 8 modifiers - the names do not have any meaning at all; they can be associated to arbitrarily chosen keys */ #define VC_SHIFTLOCK KG_SHIFT /* shift lock mode */ #define VC_ALTGRLOCK KG_ALTGR /* altgr lock mode */ #define VC_CTRLLOCK KG_CTRL /* control lock mode */ #define VC_ALTLOCK KG_ALT /* alt lock mode */ #define VC_SHIFTLLOCK KG_SHIFTL /* shiftl lock mode */ #define VC_SHIFTRLOCK KG_SHIFTR /* shiftr lock mode */ #define VC_CTRLLLOCK KG_CTRLL /* ctrll lock mode */ #define VC_CTRLRLOCK KG_CTRLR /* ctrlr lock mode */ unsigned char slockstate; /* for `sticky' Shift, Ctrl, etc. */ unsigned char ledmode:1; #define LED_SHOW_FLAGS 0 /* traditional state */ #define LED_SHOW_IOCTL 1 /* only change leds upon ioctl */ unsigned char ledflagstate:4; /* flags, not lights */ unsigned char default_ledflagstate:4; #define VC_SCROLLOCK 0 /* scroll-lock mode */ #define VC_NUMLOCK 1 /* numeric lock mode */ #define VC_CAPSLOCK 2 /* capslock mode */ #define VC_KANALOCK 3 /* kanalock mode */ unsigned char kbdmode:3; /* one 3-bit value */ #define VC_XLATE 0 /* translate keycodes using keymap */ #define VC_MEDIUMRAW 1 /* medium raw (keycode) mode */ #define VC_RAW 2 /* raw (scancode) mode */ #define VC_UNICODE 3 /* Unicode mode */ #define VC_OFF 4 /* disabled mode */ unsigned char modeflags:5; #define VC_APPLIC 0 /* application key mode */ #define VC_CKMODE 1 /* cursor key mode */ #define VC_REPEAT 2 /* keyboard repeat */ #define VC_CRLF 3 /* 0 - enter sends CR, 1 - enter sends CRLF */ #define VC_META 4 /* 0 - meta, 1 - meta=prefix with ESC */ }; extern int kbd_init(void); extern void setledstate(struct kbd_struct *kbd, unsigned int led); extern int do_poke_blanked_console; extern void (*kbd_ledfunc)(unsigned int led); extern int set_console(int nr); extern void schedule_console_callback(void); static inline int vc_kbd_mode(struct kbd_struct * kbd, int flag) { return ((kbd->modeflags >> flag) & 1); } static inline int vc_kbd_led(struct kbd_struct * kbd, int flag) { return ((kbd->ledflagstate >> flag) & 1); } static inline void set_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags |= 1 << flag; } static inline void set_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate |= 1 << flag; } static inline void clr_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags &= ~(1 << flag); } static inline void clr_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate &= ~(1 << flag); } static inline void chg_vc_kbd_lock(struct kbd_struct * kbd, int flag) { kbd->lockstate ^= 1 << flag; } static inline void chg_vc_kbd_slock(struct kbd_struct * kbd, int flag) { kbd->slockstate ^= 1 << flag; } static inline void chg_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags ^= 1 << flag; } static inline void chg_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate ^= 1 << flag; } #define U(x) ((x) ^ 0xf000) #define BRL_UC_ROW 0x2800 /* keyboard.c */ struct console; void vt_set_leds_compute_shiftstate(void); /* defkeymap.c */ extern unsigned int keymap_count; #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 // SPDX-License-Identifier: GPL-2.0 /* * Miscellaneous cgroup controller * * Copyright 2020 Google LLC * Author: Vipin Sharma <vipinsh@google.com> */ #include <linux/limits.h> #include <linux/cgroup.h> #include <linux/errno.h> #include <linux/atomic.h> #include <linux/slab.h> #include <linux/misc_cgroup.h> #define MAX_STR "max" #define MAX_NUM U64_MAX /* Miscellaneous res name, keep it in sync with enum misc_res_type */ static const char *const misc_res_name[] = { #ifdef CONFIG_KVM_AMD_SEV /* AMD SEV ASIDs resource */ "sev", /* AMD SEV-ES ASIDs resource */ "sev_es", #endif #ifdef CONFIG_INTEL_TDX_HOST /* Intel TDX HKIDs resource */ "tdx", #endif }; /* Root misc cgroup */ static struct misc_cg root_cg; /* * Miscellaneous resources capacity for the entire machine. 0 capacity means * resource is not initialized or not present in the host. * * root_cg.max and capacity are independent of each other. root_cg.max can be * more than the actual capacity. We are using Limits resource distribution * model of cgroup for miscellaneous controller. */ static u64 misc_res_capacity[MISC_CG_RES_TYPES]; /** * parent_misc() - Get the parent of the passed misc cgroup. * @cgroup: cgroup whose parent needs to be fetched. * * Context: Any context. * Return: * * struct misc_cg* - Parent of the @cgroup. * * %NULL - If @cgroup is null or the passed cgroup does not have a parent. */ static struct misc_cg *parent_misc(struct misc_cg *cgroup) { return cgroup ? css_misc(cgroup->css.parent) : NULL; } /** * valid_type() - Check if @type is valid or not. * @type: misc res type. * * Context: Any context. * Return: * * true - If valid type. * * false - If not valid type. */ static inline bool valid_type(enum misc_res_type type) { return type >= 0 && type < MISC_CG_RES_TYPES; } /** * misc_cg_set_capacity() - Set the capacity of the misc cgroup res. * @type: Type of the misc res. * @capacity: Supported capacity of the misc res on the host. * * If capacity is 0 then the charging a misc cgroup fails for that type. * * Context: Any context. * Return: * * %0 - Successfully registered the capacity. * * %-EINVAL - If @type is invalid. */ int misc_cg_set_capacity(enum misc_res_type type, u64 capacity) { if (!valid_type(type)) return -EINVAL; WRITE_ONCE(misc_res_capacity[type], capacity); return 0; } EXPORT_SYMBOL_GPL(misc_cg_set_capacity); /** * misc_cg_cancel_charge() - Cancel the charge from the misc cgroup. * @type: Misc res type in misc cg to cancel the charge from. * @cg: Misc cgroup to cancel charge from. * @amount: Amount to cancel. * * Context: Any context. */ static void misc_cg_cancel_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount) { WARN_ONCE(atomic64_add_negative(-amount, &cg->res[type].usage), "misc cgroup resource %s became less than 0", misc_res_name[type]); } static void misc_cg_update_watermark(struct misc_res *res, u64 new_usage) { u64 old; while (true) { old = atomic64_read(&res->watermark); if (new_usage <= old) break; if (atomic64_cmpxchg(&res->watermark, old, new_usage) == old) break; } } static void misc_cg_event(enum misc_res_type type, struct misc_cg *cg) { atomic64_inc(&cg->res[type].events_local); cgroup_file_notify(&cg->events_local_file); for (; parent_misc(cg); cg = parent_misc(cg)) { atomic64_inc(&cg->res[type].events); cgroup_file_notify(&cg->events_file); } } /** * misc_cg_try_charge() - Try charging the misc cgroup. * @type: Misc res type to charge. * @cg: Misc cgroup which will be charged. * @amount: Amount to charge. * * Charge @amount to the misc cgroup. Caller must use the same cgroup during * the uncharge call. * * Context: Any context. * Return: * * %0 - If successfully charged. * * -EINVAL - If @type is invalid or misc res has 0 capacity. * * -EBUSY - If max limit will be crossed or total usage will be more than the * capacity. */ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount) { struct misc_cg *i, *j; int ret; struct misc_res *res; u64 new_usage; if (!(valid_type(type) && cg && READ_ONCE(misc_res_capacity[type]))) return -EINVAL; if (!amount) return 0; for (i = cg; i; i = parent_misc(i)) { res = &i->res[type]; new_usage = atomic64_add_return(amount, &res->usage); if (new_usage > READ_ONCE(res->max) || new_usage > READ_ONCE(misc_res_capacity[type])) { ret = -EBUSY; goto err_charge; } misc_cg_update_watermark(res, new_usage); } return 0; err_charge: misc_cg_event(type, i); for (j = cg; j != i; j = parent_misc(j)) misc_cg_cancel_charge(type, j, amount); misc_cg_cancel_charge(type, i, amount); return ret; } EXPORT_SYMBOL_GPL(misc_cg_try_charge); /** * misc_cg_uncharge() - Uncharge the misc cgroup. * @type: Misc res type which was charged. * @cg: Misc cgroup which will be uncharged. * @amount: Charged amount. * * Context: Any context. */ void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount) { struct misc_cg *i; if (!(amount && valid_type(type) && cg)) return; for (i = cg; i; i = parent_misc(i)) misc_cg_cancel_charge(type, i, amount); } EXPORT_SYMBOL_GPL(misc_cg_uncharge); /** * misc_cg_max_show() - Show the misc cgroup max limit. * @sf: Interface file * @v: Arguments passed * * Context: Any context. * Return: 0 to denote successful print. */ static int misc_cg_max_show(struct seq_file *sf, void *v) { int i; struct misc_cg *cg = css_misc(seq_css(sf)); u64 max; for (i = 0; i < MISC_CG_RES_TYPES; i++) { if (READ_ONCE(misc_res_capacity[i])) { max = READ_ONCE(cg->res[i].max); if (max == MAX_NUM) seq_printf(sf, "%s max\n", misc_res_name[i]); else seq_printf(sf, "%s %llu\n", misc_res_name[i], max); } } return 0; } /** * misc_cg_max_write() - Update the maximum limit of the cgroup. * @of: Handler for the file. * @buf: Data from the user. It should be either "max", 0, or a positive * integer. * @nbytes: Number of bytes of the data. * @off: Offset in the file. * * User can pass data like: * echo sev 23 > misc.max, OR * echo sev max > misc.max * * Context: Any context. * Return: * * >= 0 - Number of bytes processed in the input. * * -EINVAL - If buf is not valid. * * -ERANGE - If number is bigger than the u64 capacity. */ static ssize_t misc_cg_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct misc_cg *cg; u64 max; int ret = 0, i; enum misc_res_type type = MISC_CG_RES_TYPES; char *token; buf = strstrip(buf); token = strsep(&buf, " "); if (!token || !buf) return -EINVAL; for (i = 0; i < MISC_CG_RES_TYPES; i++) { if (!strcmp(misc_res_name[i], token)) { type = i; break; } } if (type == MISC_CG_RES_TYPES) return -EINVAL; if (!strcmp(MAX_STR, buf)) { max = MAX_NUM; } else { ret = kstrtou64(buf, 0, &max); if (ret) return ret; } cg = css_misc(of_css(of)); if (READ_ONCE(misc_res_capacity[type])) WRITE_ONCE(cg->res[type].max, max); else ret = -EINVAL; return ret ? ret : nbytes; } /** * misc_cg_current_show() - Show the current usage of the misc cgroup. * @sf: Interface file * @v: Arguments passed * * Context: Any context. * Return: 0 to denote successful print. */ static int misc_cg_current_show(struct seq_file *sf, void *v) { int i; u64 usage; struct misc_cg *cg = css_misc(seq_css(sf)); for (i = 0; i < MISC_CG_RES_TYPES; i++) { usage = atomic64_read(&cg->res[i].usage); if (READ_ONCE(misc_res_capacity[i]) || usage) seq_printf(sf, "%s %llu\n", misc_res_name[i], usage); } return 0; } /** * misc_cg_peak_show() - Show the peak usage of the misc cgroup. * @sf: Interface file * @v: Arguments passed * * Context: Any context. * Return: 0 to denote successful print. */ static int misc_cg_peak_show(struct seq_file *sf, void *v) { int i; u64 watermark; struct misc_cg *cg = css_misc(seq_css(sf)); for (i = 0; i < MISC_CG_RES_TYPES; i++) { watermark = atomic64_read(&cg->res[i].watermark); if (READ_ONCE(misc_res_capacity[i]) || watermark) seq_printf(sf, "%s %llu\n", misc_res_name[i], watermark); } return 0; } /** * misc_cg_capacity_show() - Show the total capacity of misc res on the host. * @sf: Interface file * @v: Arguments passed * * Only present in the root cgroup directory. * * Context: Any context. * Return: 0 to denote successful print. */ static int misc_cg_capacity_show(struct seq_file *sf, void *v) { int i; u64 cap; for (i = 0; i < MISC_CG_RES_TYPES; i++) { cap = READ_ONCE(misc_res_capacity[i]); if (cap) seq_printf(sf, "%s %llu\n", misc_res_name[i], cap); } return 0; } static int __misc_events_show(struct seq_file *sf, bool local) { struct misc_cg *cg = css_misc(seq_css(sf)); u64 events; int i; for (i = 0; i < MISC_CG_RES_TYPES; i++) { if (local) events = atomic64_read(&cg->res[i].events_local); else events = atomic64_read(&cg->res[i].events); if (READ_ONCE(misc_res_capacity[i]) || events) seq_printf(sf, "%s.max %llu\n", misc_res_name[i], events); } return 0; } static int misc_events_show(struct seq_file *sf, void *v) { return __misc_events_show(sf, false); } static int misc_events_local_show(struct seq_file *sf, void *v) { return __misc_events_show(sf, true); } /* Misc cgroup interface files */ static struct cftype misc_cg_files[] = { { .name = "max", .write = misc_cg_max_write, .seq_show = misc_cg_max_show, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "current", .seq_show = misc_cg_current_show, }, { .name = "peak", .seq_show = misc_cg_peak_show, }, { .name = "capacity", .seq_show = misc_cg_capacity_show, .flags = CFTYPE_ONLY_ON_ROOT, }, { .name = "events", .flags = CFTYPE_NOT_ON_ROOT, .file_offset = offsetof(struct misc_cg, events_file), .seq_show = misc_events_show, }, { .name = "events.local", .flags = CFTYPE_NOT_ON_ROOT, .file_offset = offsetof(struct misc_cg, events_local_file), .seq_show = misc_events_local_show, }, {} }; /** * misc_cg_alloc() - Allocate misc cgroup. * @parent_css: Parent cgroup. * * Context: Process context. * Return: * * struct cgroup_subsys_state* - css of the allocated cgroup. * * ERR_PTR(-ENOMEM) - No memory available to allocate. */ static struct cgroup_subsys_state * misc_cg_alloc(struct cgroup_subsys_state *parent_css) { enum misc_res_type i; struct misc_cg *cg; if (!parent_css) { cg = &root_cg; } else { cg = kzalloc(sizeof(*cg), GFP_KERNEL); if (!cg) return ERR_PTR(-ENOMEM); } for (i = 0; i < MISC_CG_RES_TYPES; i++) { WRITE_ONCE(cg->res[i].max, MAX_NUM); atomic64_set(&cg->res[i].usage, 0); } return &cg->css; } /** * misc_cg_free() - Free the misc cgroup. * @css: cgroup subsys object. * * Context: Any context. */ static void misc_cg_free(struct cgroup_subsys_state *css) { kfree(css_misc(css)); } /* Cgroup controller callbacks */ struct cgroup_subsys misc_cgrp_subsys = { .css_alloc = misc_cg_alloc, .css_free = misc_cg_free, .legacy_cftypes = misc_cg_files, .dfl_cftypes = misc_cg_files, };
2 2 104 104 16 16 1 16 2 15 1 16 2 15 16 16 1 16 3 3 21 21 1 1 21 21 21 21 21 16 4 6 6 16 1 16 16 7 16 16 16 16 16 16 16 116 116 116 1 115 4 4 4 116 71 76 50 116 18 18 116 116 116 106 16 104 18 104 1 1 1 127 127 20 20 119 119 18 18 18 144 34 34 13 2 18 103 103 103 65 82 59 108 111 111 111 111 4 1 3 1 182 92 145 60 115 113 5 100 100 100 100 100 100 100 100 100 91 116 77 91 100 100 100 100 100 24 100 100 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_error.h" #include "xfs_health.h" /* * Lock order: * * ip->i_lock * qi->qi_tree_lock * dquot->q_qlock * dquot->q_flush (xfs_dqflock() and friends) * qi->qi_lru_lock * * If two dquots need to be locked the order is user before group/project, * otherwise by the lowest id first, see xfs_dqlock2. */ struct kmem_cache *xfs_dqtrx_cache; static struct kmem_cache *xfs_dquot_cache; static struct lock_class_key xfs_dquot_group_class; static struct lock_class_key xfs_dquot_project_class; /* Record observations of quota corruption with the health tracking system. */ static void xfs_dquot_mark_sick( struct xfs_dquot *dqp) { struct xfs_mount *mp = dqp->q_mount; switch (dqp->q_type) { case XFS_DQTYPE_USER: xfs_fs_mark_sick(mp, XFS_SICK_FS_UQUOTA); break; case XFS_DQTYPE_GROUP: xfs_fs_mark_sick(mp, XFS_SICK_FS_GQUOTA); break; case XFS_DQTYPE_PROJ: xfs_fs_mark_sick(mp, XFS_SICK_FS_PQUOTA); break; default: ASSERT(0); break; } } /* * Detach the dquot buffer if it's still attached, because we can get called * through dqpurge after a log shutdown. Caller must hold the dqflock or have * otherwise isolated the dquot. */ void xfs_dquot_detach_buf( struct xfs_dquot *dqp) { struct xfs_dq_logitem *qlip = &dqp->q_logitem; struct xfs_buf *bp = NULL; spin_lock(&qlip->qli_lock); if (qlip->qli_item.li_buf) { bp = qlip->qli_item.li_buf; qlip->qli_item.li_buf = NULL; } spin_unlock(&qlip->qli_lock); if (bp) { xfs_buf_lock(bp); list_del_init(&qlip->qli_item.li_bio_list); xfs_buf_relse(bp); } } /* * This is called to free all the memory associated with a dquot */ void xfs_qm_dqdestroy( struct xfs_dquot *dqp) { ASSERT(list_empty(&dqp->q_lru)); ASSERT(dqp->q_logitem.qli_item.li_buf == NULL); kvfree(dqp->q_logitem.qli_item.li_lv_shadow); mutex_destroy(&dqp->q_qlock); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); kmem_cache_free(xfs_dquot_cache, dqp); } /* * If default limits are in force, push them into the dquot now. * We overwrite the dquot limits only if they are zero and this * is not the root dquot. */ void xfs_qm_adjust_dqlimits( struct xfs_dquot *dq) { struct xfs_mount *mp = dq->q_mount; struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_def_quota *defq; int prealloc = 0; ASSERT(dq->q_id); defq = xfs_get_defquota(q, xfs_dquot_type(dq)); if (!dq->q_blk.softlimit) { dq->q_blk.softlimit = defq->blk.soft; prealloc = 1; } if (!dq->q_blk.hardlimit) { dq->q_blk.hardlimit = defq->blk.hard; prealloc = 1; } if (!dq->q_ino.softlimit) dq->q_ino.softlimit = defq->ino.soft; if (!dq->q_ino.hardlimit) dq->q_ino.hardlimit = defq->ino.hard; if (!dq->q_rtb.softlimit) dq->q_rtb.softlimit = defq->rtb.soft; if (!dq->q_rtb.hardlimit) dq->q_rtb.hardlimit = defq->rtb.hard; if (prealloc) xfs_dquot_set_prealloc_limits(dq); } /* Set the expiration time of a quota's grace period. */ time64_t xfs_dquot_set_timeout( struct xfs_mount *mp, time64_t timeout) { struct xfs_quotainfo *qi = mp->m_quotainfo; return clamp_t(time64_t, timeout, qi->qi_expiry_min, qi->qi_expiry_max); } /* Set the length of the default grace period. */ time64_t xfs_dquot_set_grace_period( time64_t grace) { return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX); } /* * Determine if this quota counter is over either limit and set the quota * timers as appropriate. */ static inline void xfs_qm_adjust_res_timer( struct xfs_mount *mp, struct xfs_dquot_res *res, struct xfs_quota_limits *qlim) { ASSERT(res->hardlimit == 0 || res->softlimit <= res->hardlimit); if ((res->softlimit && res->count > res->softlimit) || (res->hardlimit && res->count > res->hardlimit)) { if (res->timer == 0) res->timer = xfs_dquot_set_timeout(mp, ktime_get_real_seconds() + qlim->time); } else { res->timer = 0; } } /* * Check the limits and timers of a dquot and start or reset timers * if necessary. * This gets called even when quota enforcement is OFF, which makes our * life a little less complicated. (We just don't reject any quota * reservations in that case, when enforcement is off). * We also return 0 as the values of the timers in Q_GETQUOTA calls, when * enforcement's off. * In contrast, warnings are a little different in that they don't * 'automatically' get started when limits get exceeded. They do * get reset to zero, however, when we find the count to be under * the soft limit (they are only ever set non-zero via userspace). */ void xfs_qm_adjust_dqtimers( struct xfs_dquot *dq) { struct xfs_mount *mp = dq->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; struct xfs_def_quota *defq; ASSERT(dq->q_id); defq = xfs_get_defquota(qi, xfs_dquot_type(dq)); xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_blk, &defq->blk); xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_ino, &defq->ino); xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_rtb, &defq->rtb); } /* * initialize a buffer full of dquots and log the whole thing */ void xfs_qm_init_dquot_blk( struct xfs_trans *tp, xfs_dqid_t id, xfs_dqtype_t type, struct xfs_buf *bp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_dqblk *d; xfs_dqid_t curid; unsigned int qflag; unsigned int blftype; int i; ASSERT(tp); ASSERT(xfs_buf_islocked(bp)); switch (type) { case XFS_DQTYPE_USER: qflag = XFS_UQUOTA_CHKD; blftype = XFS_BLF_UDQUOT_BUF; break; case XFS_DQTYPE_PROJ: qflag = XFS_PQUOTA_CHKD; blftype = XFS_BLF_PDQUOT_BUF; break; case XFS_DQTYPE_GROUP: qflag = XFS_GQUOTA_CHKD; blftype = XFS_BLF_GDQUOT_BUF; break; default: ASSERT(0); return; } d = bp->b_addr; /* * ID of the first dquot in the block - id's are zero based. */ curid = id - (id % q->qi_dqperchunk); memset(d, 0, BBTOB(q->qi_dqchunklen)); for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); d->dd_diskdq.d_version = XFS_DQUOT_VERSION; d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_type = type; if (curid > 0 && xfs_has_bigtime(mp)) d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME; if (xfs_has_crc(mp)) { uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } } xfs_trans_dquot_buf(tp, bp, blftype); /* * quotacheck uses delayed writes to update all the dquots on disk in an * efficient manner instead of logging the individual dquot changes as * they are made. However if we log the buffer allocated here and crash * after quotacheck while the logged initialisation is still in the * active region of the log, log recovery can replay the dquot buffer * initialisation over the top of the checked dquots and corrupt quota * accounting. * * To avoid this problem, quotacheck cannot log the initialised buffer. * We must still dirty the buffer and write it back before the * allocation transaction clears the log. Therefore, mark the buffer as * ordered instead of logging it directly. This is safe for quotacheck * because it detects and repairs allocated but initialized dquot blocks * in the quota inodes. */ if (!(mp->m_qflags & qflag)) xfs_trans_ordered_buf(tp, bp); else xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); } static void xfs_dquot_set_prealloc( struct xfs_dquot_pre *pre, const struct xfs_dquot_res *res) { xfs_qcnt_t space; pre->q_prealloc_hi_wmark = res->hardlimit; pre->q_prealloc_lo_wmark = res->softlimit; space = div_u64(pre->q_prealloc_hi_wmark, 100); if (!pre->q_prealloc_lo_wmark) pre->q_prealloc_lo_wmark = space * 95; pre->q_low_space[XFS_QLOWSP_1_PCNT] = space; pre->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; pre->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; } /* * Initialize the dynamic speculative preallocation thresholds. The lo/hi * watermarks correspond to the soft and hard limits by default. If a soft limit * is not specified, we use 95% of the hard limit. */ void xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) { xfs_dquot_set_prealloc(&dqp->q_blk_prealloc, &dqp->q_blk); xfs_dquot_set_prealloc(&dqp->q_rtb_prealloc, &dqp->q_rtb); } /* * Ensure that the given in-core dquot has a buffer on disk backing it, and * return the buffer locked and held. This is called when the bmapi finds a * hole. */ STATIC int xfs_dquot_disk_alloc( struct xfs_dquot *dqp, struct xfs_buf **bpp) { struct xfs_bmbt_irec map; struct xfs_trans *tp; struct xfs_mount *mp = dqp->q_mount; struct xfs_buf *bp; xfs_dqtype_t qtype = xfs_dquot_type(dqp); struct xfs_inode *quotip = xfs_quota_inode(mp, qtype); int nmaps = 1; int error; trace_xfs_dqalloc(dqp); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp); if (error) return error; xfs_ilock(quotip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, quotip, 0); if (!xfs_this_quota_on(dqp->q_mount, qtype)) { /* * Return if this type of quotas is turned off while we didn't * have an inode lock */ error = -ESRCH; goto err_cancel; } error = xfs_iext_count_extend(tp, quotip, XFS_DATA_FORK, XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto err_cancel; /* Create the block mapping. */ error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, &nmaps); if (error) goto err_cancel; ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); /* * Keep track of the blkno to save a lookup later */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp); if (error) goto err_cancel; bp->b_ops = &xfs_dquot_buf_ops; /* * Make a chunk of dquots out of this buffer and log * the entire thing. */ xfs_qm_init_dquot_blk(tp, dqp->q_id, qtype, bp); xfs_buf_set_ref(bp, XFS_DQUOT_REF); /* * Hold the buffer and join it to the dfops so that we'll still own * the buffer when we return to the caller. The buffer disposal on * error must be paid attention to very carefully, as it has been * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota * code when allocating a new dquot record" in 2005, and the later * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep * the buffer locked across the _defer_finish call. We can now do * this correctly with xfs_defer_bjoin. * * Above, we allocated a disk block for the dquot information and used * get_buf to initialize the dquot. If the _defer_finish fails, the old * transaction is gone but the new buffer is not joined or held to any * transaction, so we must _buf_relse it. * * If everything succeeds, the caller of this function is returned a * buffer that is locked and held to the transaction. The caller * is responsible for unlocking any buffer passed back, either * manually or by committing the transaction. On error, the buffer is * released and not passed back. * * Keep the quota inode ILOCKed until after the transaction commit to * maintain the atomicity of bmap/rmap updates. */ xfs_trans_bhold(tp, bp); error = xfs_trans_commit(tp); xfs_iunlock(quotip, XFS_ILOCK_EXCL); if (error) { xfs_buf_relse(bp); return error; } *bpp = bp; return 0; err_cancel: xfs_trans_cancel(tp); xfs_iunlock(quotip, XFS_ILOCK_EXCL); return error; } /* * Read in the in-core dquot's on-disk metadata and return the buffer. * Returns ENOENT to signal a hole. */ STATIC int xfs_dquot_disk_read( struct xfs_mount *mp, struct xfs_dquot *dqp, struct xfs_buf **bpp) { struct xfs_bmbt_irec map; struct xfs_buf *bp; xfs_dqtype_t qtype = xfs_dquot_type(dqp); struct xfs_inode *quotip = xfs_quota_inode(mp, qtype); uint lock_mode; int nmaps = 1; int error; lock_mode = xfs_ilock_data_map_shared(quotip); if (!xfs_this_quota_on(mp, qtype)) { /* * Return if this type of quotas is turned off while we * didn't have the quota inode lock. */ xfs_iunlock(quotip, lock_mode); return -ESRCH; } /* * Find the block map; no allocations yet */ error = xfs_bmapi_read(quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); xfs_iunlock(quotip, lock_mode); if (error) return error; ASSERT(nmaps == 1); ASSERT(map.br_blockcount >= 1); ASSERT(map.br_startblock != DELAYSTARTBLOCK); if (map.br_startblock == HOLESTARTBLOCK) return -ENOENT; trace_xfs_dqtobp_read(dqp); /* * store the blkno etc so that we don't have to do the * mapping all the time */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); if (xfs_metadata_is_sick(error)) xfs_dquot_mark_sick(dqp); if (error) { ASSERT(bp == NULL); return error; } ASSERT(xfs_buf_islocked(bp)); xfs_buf_set_ref(bp, XFS_DQUOT_REF); *bpp = bp; return 0; } /* Allocate and initialize everything we need for an incore dquot. */ STATIC struct xfs_dquot * xfs_dquot_alloc( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type) { struct xfs_dquot *dqp; dqp = kmem_cache_zalloc(xfs_dquot_cache, GFP_KERNEL | __GFP_NOFAIL); dqp->q_type = type; dqp->q_id = id; dqp->q_mount = mp; INIT_LIST_HEAD(&dqp->q_lru); mutex_init(&dqp->q_qlock); init_waitqueue_head(&dqp->q_pinwait); dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; /* * Offset of dquot in the (fixed sized) dquot chunk. */ dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * sizeof(struct xfs_dqblk); /* * Because we want to use a counting completion, complete * the flush completion once to allow a single access to * the flush completion without blocking. */ init_completion(&dqp->q_flush); complete(&dqp->q_flush); /* * Make sure group quotas have a different lock class than user * quotas. */ switch (type) { case XFS_DQTYPE_USER: /* uses the default lock class */ break; case XFS_DQTYPE_GROUP: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); break; case XFS_DQTYPE_PROJ: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); break; default: ASSERT(0); break; } xfs_qm_dquot_logitem_init(dqp); XFS_STATS_INC(mp, xs_qm_dquot); return dqp; } /* Check the ondisk dquot's id and type match what the incore dquot expects. */ static bool xfs_dquot_check_type( struct xfs_dquot *dqp, struct xfs_disk_dquot *ddqp) { uint8_t ddqp_type; uint8_t dqp_type; ddqp_type = ddqp->d_type & XFS_DQTYPE_REC_MASK; dqp_type = xfs_dquot_type(dqp); if (be32_to_cpu(ddqp->d_id) != dqp->q_id) return false; /* * V5 filesystems always expect an exact type match. V4 filesystems * expect an exact match for user dquots and for non-root group and * project dquots. */ if (xfs_has_crc(dqp->q_mount) || dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0) return ddqp_type == dqp_type; /* * V4 filesystems support either group or project quotas, but not both * at the same time. The non-user quota file can be switched between * group and project quota uses depending on the mount options, which * means that we can encounter the other type when we try to load quota * defaults. Quotacheck will soon reset the entire quota file * (including the root dquot) anyway, but don't log scary corruption * reports to dmesg. */ return ddqp_type == XFS_DQTYPE_GROUP || ddqp_type == XFS_DQTYPE_PROJ; } /* Copy the in-core quota fields in from the on-disk buffer. */ STATIC int xfs_dquot_from_disk( struct xfs_dquot *dqp, struct xfs_buf *bp) { struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; /* * Ensure that we got the type and ID we were looking for. * Everything else was checked by the dquot buffer verifier. */ if (!xfs_dquot_check_type(dqp, ddqp)) { xfs_alert_tag(bp->b_mount, XFS_PTAG_VERIFIER_ERROR, "Metadata corruption detected at %pS, quota %u", __this_address, dqp->q_id); xfs_alert(bp->b_mount, "Unmount and run xfs_repair"); xfs_dquot_mark_sick(dqp); return -EFSCORRUPTED; } /* copy everything from disk dquot to the incore dquot */ dqp->q_type = ddqp->d_type; dqp->q_blk.hardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); dqp->q_blk.softlimit = be64_to_cpu(ddqp->d_blk_softlimit); dqp->q_ino.hardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); dqp->q_ino.softlimit = be64_to_cpu(ddqp->d_ino_softlimit); dqp->q_rtb.hardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); dqp->q_rtb.softlimit = be64_to_cpu(ddqp->d_rtb_softlimit); dqp->q_blk.count = be64_to_cpu(ddqp->d_bcount); dqp->q_ino.count = be64_to_cpu(ddqp->d_icount); dqp->q_rtb.count = be64_to_cpu(ddqp->d_rtbcount); dqp->q_blk.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_btimer); dqp->q_ino.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_itimer); dqp->q_rtb.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_rtbtimer); /* * Reservation counters are defined as reservation plus current usage * to avoid having to add every time. */ dqp->q_blk.reserved = dqp->q_blk.count; dqp->q_ino.reserved = dqp->q_ino.count; dqp->q_rtb.reserved = dqp->q_rtb.count; /* initialize the dquot speculative prealloc thresholds */ xfs_dquot_set_prealloc_limits(dqp); return 0; } /* Copy the in-core quota fields into the on-disk buffer. */ void xfs_dquot_to_disk( struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp) { ddqp->d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); ddqp->d_version = XFS_DQUOT_VERSION; ddqp->d_type = dqp->q_type; ddqp->d_id = cpu_to_be32(dqp->q_id); ddqp->d_pad0 = 0; ddqp->d_pad = 0; ddqp->d_blk_hardlimit = cpu_to_be64(dqp->q_blk.hardlimit); ddqp->d_blk_softlimit = cpu_to_be64(dqp->q_blk.softlimit); ddqp->d_ino_hardlimit = cpu_to_be64(dqp->q_ino.hardlimit); ddqp->d_ino_softlimit = cpu_to_be64(dqp->q_ino.softlimit); ddqp->d_rtb_hardlimit = cpu_to_be64(dqp->q_rtb.hardlimit); ddqp->d_rtb_softlimit = cpu_to_be64(dqp->q_rtb.softlimit); ddqp->d_bcount = cpu_to_be64(dqp->q_blk.count); ddqp->d_icount = cpu_to_be64(dqp->q_ino.count); ddqp->d_rtbcount = cpu_to_be64(dqp->q_rtb.count); ddqp->d_bwarns = 0; ddqp->d_iwarns = 0; ddqp->d_rtbwarns = 0; ddqp->d_btimer = xfs_dquot_to_disk_ts(dqp, dqp->q_blk.timer); ddqp->d_itimer = xfs_dquot_to_disk_ts(dqp, dqp->q_ino.timer); ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer); } /* * Read in the ondisk dquot using dqtobp() then copy it to an incore version, * and release the buffer immediately. If @can_alloc is true, fill any * holes in the on-disk metadata. */ static int xfs_qm_dqread( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, bool can_alloc, struct xfs_dquot **dqpp) { struct xfs_dquot *dqp; struct xfs_buf *bp; int error; dqp = xfs_dquot_alloc(mp, id, type); trace_xfs_dqread(dqp); /* Try to read the buffer, allocating if necessary. */ error = xfs_dquot_disk_read(mp, dqp, &bp); if (error == -ENOENT && can_alloc) error = xfs_dquot_disk_alloc(dqp, &bp); if (error) goto err; /* * At this point we should have a clean locked buffer. Copy the data * to the incore dquot and release the buffer since the incore dquot * has its own locking protocol so we needn't tie up the buffer any * further. */ ASSERT(xfs_buf_islocked(bp)); error = xfs_dquot_from_disk(dqp, bp); xfs_buf_relse(bp); if (error) goto err; *dqpp = dqp; return error; err: trace_xfs_dqread_fail(dqp); xfs_qm_dqdestroy(dqp); *dqpp = NULL; return error; } /* * Advance to the next id in the current chunk, or if at the * end of the chunk, skip ahead to first id in next allocated chunk * using the SEEK_DATA interface. */ static int xfs_dq_get_next_id( struct xfs_mount *mp, xfs_dqtype_t type, xfs_dqid_t *id) { struct xfs_inode *quotip = xfs_quota_inode(mp, type); xfs_dqid_t next_id = *id + 1; /* simple advance */ uint lock_flags; struct xfs_bmbt_irec got; struct xfs_iext_cursor cur; xfs_fsblock_t start; int error = 0; /* If we'd wrap past the max ID, stop */ if (next_id < *id) return -ENOENT; /* If new ID is within the current chunk, advancing it sufficed */ if (next_id % mp->m_quotainfo->qi_dqperchunk) { *id = next_id; return 0; } /* Nope, next_id is now past the current chunk, so find the next one */ start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; lock_flags = xfs_ilock_data_map_shared(quotip); error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK); if (error) return error; if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) { /* contiguous chunk, bump startoff for the id calculation */ if (got.br_startoff < start) got.br_startoff = start; *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk; } else { error = -ENOENT; } xfs_iunlock(quotip, lock_flags); return error; } /* * Look up the dquot in the in-core cache. If found, the dquot is returned * locked and ready to go. */ static struct xfs_dquot * xfs_qm_dqget_cache_lookup( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type) { struct xfs_quotainfo *qi = mp->m_quotainfo; struct radix_tree_root *tree = xfs_dquot_tree(qi, type); struct xfs_dquot *dqp; restart: mutex_lock(&qi->qi_tree_lock); dqp = radix_tree_lookup(tree, id); if (!dqp) { mutex_unlock(&qi->qi_tree_lock); XFS_STATS_INC(mp, xs_qm_dqcachemisses); return NULL; } if (!lockref_get_not_dead(&dqp->q_lockref)) { mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_freeing(dqp); delay(1); goto restart; } mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_hit(dqp); XFS_STATS_INC(mp, xs_qm_dqcachehits); return dqp; } /* * Try to insert a new dquot into the in-core cache. If an error occurs the * caller should throw away the dquot and start over. Otherwise, the dquot * is returned (and held by the cache) as if there had been a cache hit. * * The insert needs to be done under memalloc_nofs context because the radix * tree can do memory allocation during insert. The qi->qi_tree_lock is taken in * memory reclaim when freeing unused dquots, so we cannot have the radix tree * node allocation recursing into filesystem reclaim whilst we hold the * qi_tree_lock. */ static int xfs_qm_dqget_cache_insert( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct xfs_dquot *dqp) { struct xfs_quotainfo *qi = mp->m_quotainfo; struct radix_tree_root *tree = xfs_dquot_tree(qi, type); unsigned int nofs_flags; int error; nofs_flags = memalloc_nofs_save(); mutex_lock(&qi->qi_tree_lock); error = radix_tree_insert(tree, id, dqp); if (unlikely(error)) { trace_xfs_dqget_dup(dqp); goto out_unlock; } lockref_init(&dqp->q_lockref); qi->qi_dquots++; out_unlock: mutex_unlock(&qi->qi_tree_lock); memalloc_nofs_restore(nofs_flags); return error; } /* Check our input parameters. */ static int xfs_qm_dqget_checks( struct xfs_mount *mp, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: if (!XFS_IS_UQUOTA_ON(mp)) return -ESRCH; return 0; case XFS_DQTYPE_GROUP: if (!XFS_IS_GQUOTA_ON(mp)) return -ESRCH; return 0; case XFS_DQTYPE_PROJ: if (!XFS_IS_PQUOTA_ON(mp)) return -ESRCH; return 0; default: WARN_ON_ONCE(0); return -EINVAL; } } /* * Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a * dquot, doing an allocation (if requested) as needed. */ int xfs_qm_dqget( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, bool can_alloc, struct xfs_dquot **O_dqpp) { struct xfs_dquot *dqp; int error; error = xfs_qm_dqget_checks(mp, type); if (error) return error; restart: dqp = xfs_qm_dqget_cache_lookup(mp, id, type); if (dqp) goto found; error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); if (error) return error; error = xfs_qm_dqget_cache_insert(mp, id, type, dqp); if (error) { xfs_qm_dqdestroy(dqp); if (error == -EEXIST) { /* * Duplicate found. Just throw away the new dquot and * start over. */ XFS_STATS_INC(mp, xs_qm_dquot_dups); goto restart; } return error; } trace_xfs_dqget_miss(dqp); found: *O_dqpp = dqp; return 0; } /* * Given a dquot id and type, read and initialize a dquot from the on-disk * metadata. This function is only for use during quota initialization so * it ignores the dquot cache assuming that the dquot shrinker isn't set up. * The caller is responsible for _qm_dqdestroy'ing the returned dquot. */ int xfs_qm_dqget_uncached( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct xfs_dquot **dqpp) { int error; error = xfs_qm_dqget_checks(mp, type); if (error) return error; return xfs_qm_dqread(mp, id, type, 0, dqpp); } /* Return the quota id for a given inode and type. */ xfs_dqid_t xfs_qm_id_for_quotatype( struct xfs_inode *ip, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return i_uid_read(VFS_I(ip)); case XFS_DQTYPE_GROUP: return i_gid_read(VFS_I(ip)); case XFS_DQTYPE_PROJ: return ip->i_projid; } ASSERT(0); return 0; } /* * Return the dquot for a given inode and type. If @can_alloc is true, then * allocate blocks if needed. The inode's ILOCK must be held and it must not * have already had an inode attached. */ int xfs_qm_dqget_inode( struct xfs_inode *ip, xfs_dqtype_t type, bool can_alloc, struct xfs_dquot **dqpp) { struct xfs_mount *mp = ip->i_mount; struct xfs_dquot *dqp; xfs_dqid_t id; int error; ASSERT(!*dqpp); xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); error = xfs_qm_dqget_checks(mp, type); if (error) return error; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(xfs_inode_dquot(ip, type) == NULL); ASSERT(!xfs_is_metadir_inode(ip)); id = xfs_qm_id_for_quotatype(ip, type); restart: dqp = xfs_qm_dqget_cache_lookup(mp, id, type); if (dqp) goto found; /* * Dquot cache miss. We don't want to keep the inode lock across * a (potential) disk read. Also we don't want to deal with the lock * ordering between quotainode and this inode. OTOH, dropping the inode * lock here means dealing with a chown that can happen before * we re-acquire the lock. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); xfs_ilock(ip, XFS_ILOCK_EXCL); if (error) return error; /* * A dquot could be attached to this inode by now, since we had * dropped the ilock. */ if (xfs_this_quota_on(mp, type)) { struct xfs_dquot *dqp1; dqp1 = xfs_inode_dquot(ip, type); if (dqp1) { xfs_qm_dqdestroy(dqp); dqp = dqp1; goto dqret; } } else { /* inode stays locked on return */ xfs_qm_dqdestroy(dqp); return -ESRCH; } error = xfs_qm_dqget_cache_insert(mp, id, type, dqp); if (error) { xfs_qm_dqdestroy(dqp); if (error == -EEXIST) { /* * Duplicate found. Just throw away the new dquot and * start over. */ XFS_STATS_INC(mp, xs_qm_dquot_dups); goto restart; } return error; } dqret: xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); trace_xfs_dqget_miss(dqp); found: trace_xfs_dqattach_get(dqp); *dqpp = dqp; return 0; } /* * Starting at @id and progressing upwards, look for an initialized incore * dquot, lock it, and return it. */ int xfs_qm_dqget_next( struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct xfs_dquot **dqpp) { struct xfs_dquot *dqp; int error = 0; *dqpp = NULL; for (; !error; error = xfs_dq_get_next_id(mp, type, &id)) { error = xfs_qm_dqget(mp, id, type, false, &dqp); if (error == -ENOENT) continue; else if (error != 0) break; mutex_lock(&dqp->q_qlock); if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) { *dqpp = dqp; return 0; } mutex_unlock(&dqp->q_qlock); xfs_qm_dqrele(dqp); } return error; } /* * Release a reference to the dquot. */ void xfs_qm_dqrele( struct xfs_dquot *dqp) { if (!dqp) return; trace_xfs_dqrele(dqp); if (lockref_put_or_lock(&dqp->q_lockref)) return; if (!--dqp->q_lockref.count) { struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; trace_xfs_dqrele_free(dqp); if (list_lru_add_obj(&qi->qi_lru, &dqp->q_lru)) XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); } spin_unlock(&dqp->q_lockref.lock); } /* * This is the dquot flushing I/O completion routine. It is called * from interrupt level when the buffer containing the dquot is * flushed to disk. It is responsible for removing the dquot logitem * from the AIL if it has not been re-logged, and unlocking the dquot's * flush lock. This behavior is very similar to that of inodes.. */ static void xfs_qm_dqflush_done( struct xfs_log_item *lip) { struct xfs_dq_logitem *qlip = container_of(lip, struct xfs_dq_logitem, qli_item); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_ail *ailp = lip->li_ailp; struct xfs_buf *bp = NULL; xfs_lsn_t tail_lsn; /* * We only want to pull the item from the AIL if its * location in the log has not changed since we started the flush. * Thus, we only bother if the dquot's lsn has * not changed. First we check the lsn outside the lock * since it's cheaper, and then we recheck while * holding the lock before removing the dquot from the AIL. */ if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && (lip->li_lsn == qlip->qli_flush_lsn || test_bit(XFS_LI_FAILED, &lip->li_flags))) { spin_lock(&ailp->ail_lock); clear_bit(XFS_LI_FAILED, &lip->li_flags); if (lip->li_lsn == qlip->qli_flush_lsn) { /* xfs_ail_update_finish() drops the AIL lock */ tail_lsn = xfs_ail_delete_one(ailp, lip); xfs_ail_update_finish(ailp, tail_lsn); } else { spin_unlock(&ailp->ail_lock); } } /* * If this dquot hasn't been dirtied since initiating the last dqflush, * release the buffer reference. We already unlinked this dquot item * from the buffer. */ spin_lock(&qlip->qli_lock); if (!qlip->qli_dirty) { bp = lip->li_buf; lip->li_buf = NULL; } spin_unlock(&qlip->qli_lock); if (bp) xfs_buf_rele(bp); /* * Release the dq's flush lock since we're done with it. */ xfs_dqfunlock(dqp); } void xfs_buf_dquot_iodone( struct xfs_buf *bp) { struct xfs_log_item *lip, *n; list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { list_del_init(&lip->li_bio_list); xfs_qm_dqflush_done(lip); } } /* Check incore dquot for errors before we flush. */ static xfs_failaddr_t xfs_qm_dqflush_check( struct xfs_dquot *dqp) { xfs_dqtype_t type = xfs_dquot_type(dqp); if (type != XFS_DQTYPE_USER && type != XFS_DQTYPE_GROUP && type != XFS_DQTYPE_PROJ) return __this_address; if (dqp->q_id == 0) return NULL; if (dqp->q_blk.softlimit && dqp->q_blk.count > dqp->q_blk.softlimit && !dqp->q_blk.timer) return __this_address; if (dqp->q_ino.softlimit && dqp->q_ino.count > dqp->q_ino.softlimit && !dqp->q_ino.timer) return __this_address; if (dqp->q_rtb.softlimit && dqp->q_rtb.count > dqp->q_rtb.softlimit && !dqp->q_rtb.timer) return __this_address; /* bigtime flag should never be set on root dquots */ if (dqp->q_type & XFS_DQTYPE_BIGTIME) { if (!xfs_has_bigtime(dqp->q_mount)) return __this_address; if (dqp->q_id == 0) return __this_address; } return NULL; } /* * Get the buffer containing the on-disk dquot. * * Requires dquot flush lock, will clear the dirty flag, delete the quota log * item from the AIL, and shut down the system if something goes wrong. */ static int xfs_dquot_read_buf( struct xfs_trans *tp, struct xfs_dquot *dqp, struct xfs_buf **bpp) { struct xfs_mount *mp = dqp->q_mount; struct xfs_buf *bp = NULL; int error; error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); if (xfs_metadata_is_sick(error)) xfs_dquot_mark_sick(dqp); if (error) goto out_abort; *bpp = bp; return 0; out_abort: dqp->q_flags &= ~XFS_DQFLAG_DIRTY; xfs_trans_ail_delete(&dqp->q_logitem.qli_item, 0); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } /* * Attach a dquot buffer to this dquot to avoid allocating a buffer during a * dqflush, since dqflush can be called from reclaim context. Caller must hold * the dqlock. */ int xfs_dquot_attach_buf( struct xfs_trans *tp, struct xfs_dquot *dqp) { struct xfs_dq_logitem *qlip = &dqp->q_logitem; struct xfs_log_item *lip = &qlip->qli_item; int error; spin_lock(&qlip->qli_lock); if (!lip->li_buf) { struct xfs_buf *bp = NULL; spin_unlock(&qlip->qli_lock); error = xfs_dquot_read_buf(tp, dqp, &bp); if (error) return error; /* * Hold the dquot buffer so that we retain our ref to it after * detaching it from the transaction, then give that ref to the * dquot log item so that the AIL does not have to read the * dquot buffer to push this item. */ xfs_buf_hold(bp); xfs_trans_brelse(tp, bp); spin_lock(&qlip->qli_lock); lip->li_buf = bp; } qlip->qli_dirty = true; spin_unlock(&qlip->qli_lock); return 0; } /* * Get a new reference the dquot buffer attached to this dquot for a dqflush * operation. * * Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked * bp; or -EAGAIN if the buffer could not be locked. */ int xfs_dquot_use_attached_buf( struct xfs_dquot *dqp, struct xfs_buf **bpp) { struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf; /* * A NULL buffer can happen if the dquot dirty flag was set but the * filesystem shut down before transaction commit happened. In that * case we're not going to flush anyway. */ if (!bp) { ASSERT(xfs_is_shutdown(dqp->q_mount)); *bpp = NULL; return 0; } if (!xfs_buf_trylock(bp)) return -EAGAIN; xfs_buf_hold(bp); *bpp = bp; return 0; } /* * Write a modified dquot to disk. * The dquot must be locked and the flush lock too taken by caller. * The flush lock will not be unlocked until the dquot reaches the disk, * but the dquot is free to be unlocked and modified by the caller * in the interim. Dquot is still locked on return. This behavior is * identical to that of inodes. */ int xfs_qm_dqflush( struct xfs_dquot *dqp, struct xfs_buf *bp) { struct xfs_mount *mp = dqp->q_mount; struct xfs_dq_logitem *qlip = &dqp->q_logitem; struct xfs_log_item *lip = &qlip->qli_item; struct xfs_dqblk *dqblk; xfs_failaddr_t fa; int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); ASSERT(atomic_read(&dqp->q_pincount) == 0); trace_xfs_dqflush(dqp); fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", dqp->q_id, fa); xfs_dquot_mark_sick(dqp); error = -EFSCORRUPTED; goto out_abort; } /* Flush the incore dquot to the ondisk buffer. */ dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp); /* * Clear the dirty field and remember the flush lsn for later use. */ dqp->q_flags &= ~XFS_DQFLAG_DIRTY; /* * We hold the dquot lock, so nobody can dirty it while we're * scheduling the write out. Clear the dirty-since-flush flag. */ spin_lock(&qlip->qli_lock); qlip->qli_dirty = false; spin_unlock(&qlip->qli_lock); xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn); /* * copy the lsn into the on-disk dquot now while we have the in memory * dquot here. This can't be done later in the write verifier as we * can't get access to the log item at that point in time. * * We also calculate the CRC here so that the on-disk dquot in the * buffer always has a valid CRC. This ensures there is no possibility * of a dquot without an up-to-date CRC getting to disk. */ if (xfs_has_crc(mp)) { dqblk->dd_lsn = cpu_to_be64(lip->li_lsn); xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } /* * Attach the dquot to the buffer so that we can remove this dquot from * the AIL and release the flush lock once the dquot is synced to disk. */ bp->b_iodone = xfs_buf_dquot_iodone; list_add_tail(&lip->li_bio_list, &bp->b_li_list); /* * If the buffer is pinned then push on the log so we won't * get stuck waiting in the write for too long. */ if (xfs_buf_ispinned(bp)) { trace_xfs_dqflush_force(dqp); xfs_log_force(mp, 0); } trace_xfs_dqflush_done(dqp); return 0; out_abort: dqp->q_flags &= ~XFS_DQFLAG_DIRTY; xfs_trans_ail_delete(lip, 0); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_dqfunlock(dqp); return error; } /* * Lock two xfs_dquot structures. * * To avoid deadlocks we always lock the quota structure with * the lowerd id first. */ void xfs_dqlock2( struct xfs_dquot *d1, struct xfs_dquot *d2) { if (d1 && d2) { ASSERT(d1 != d2); if (d1->q_id > d2->q_id) { mutex_lock(&d2->q_qlock); mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); } else { mutex_lock(&d1->q_qlock); mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); } } else if (d1) { mutex_lock(&d1->q_qlock); } else if (d2) { mutex_lock(&d2->q_qlock); } } static int xfs_dqtrx_cmp( const void *a, const void *b) { const struct xfs_dqtrx *qa = a; const struct xfs_dqtrx *qb = b; if (qa->qt_dquot->q_id > qb->qt_dquot->q_id) return 1; if (qa->qt_dquot->q_id < qb->qt_dquot->q_id) return -1; return 0; } void xfs_dqlockn( struct xfs_dqtrx *q) { unsigned int i; BUILD_BUG_ON(XFS_QM_TRANS_MAXDQS > MAX_LOCKDEP_SUBCLASSES); /* Sort in order of dquot id, do not allow duplicates */ for (i = 0; i < XFS_QM_TRANS_MAXDQS && q[i].qt_dquot != NULL; i++) { unsigned int j; for (j = 0; j < i; j++) ASSERT(q[i].qt_dquot != q[j].qt_dquot); } if (i == 0) return; sort(q, i, sizeof(struct xfs_dqtrx), xfs_dqtrx_cmp, NULL); mutex_lock(&q[0].qt_dquot->q_qlock); for (i = 1; i < XFS_QM_TRANS_MAXDQS && q[i].qt_dquot != NULL; i++) mutex_lock_nested(&q[i].qt_dquot->q_qlock, XFS_QLOCK_NESTED + i - 1); } int __init xfs_qm_init(void) { xfs_dquot_cache = kmem_cache_create("xfs_dquot", sizeof(struct xfs_dquot), 0, 0, NULL); if (!xfs_dquot_cache) goto out; xfs_dqtrx_cache = kmem_cache_create("xfs_dqtrx", sizeof(struct xfs_dquot_acct), 0, 0, NULL); if (!xfs_dqtrx_cache) goto out_free_dquot_cache; return 0; out_free_dquot_cache: kmem_cache_destroy(xfs_dquot_cache); out: return -ENOMEM; } void xfs_qm_exit(void) { kmem_cache_destroy(xfs_dqtrx_cache); kmem_cache_destroy(xfs_dquot_cache); }
3 3 3 2 1 11 1 3 4 1 1 43 43 11 1 1 9 10 7 1 6 6 1 2 10 10 4 2 1 1 1 11 9 5 12 4 4 1 1 1 1 1 1 17 5 12 11 6 12 12 12 12 12 25 14 13 34 25 19 17 17 18 14 29 34 13 21 29 5 7 7 7 29 29 29 29 29 29 12 29 29 29 29 29 29 32 6 22 22 21 21 21 7 20 2 18 19 1 1 18 20 18 18 17 17 17 12 9 1 8 8 9 40 39 39 18 22 38 28 2 1 18 8 28 18 1 4 4 4 1 2 55 83 78 1 4 2 1 60 18 1 71 6 1 70 6 1 45 16 33 30 2 1 1 50 50 5 45 45 45 5 1 39 41 3 2 42 76 67 1 8 5 2 45 27 2 2 22 50 17 6 3 53 3 58 7 59 6 51 12 2 2 58 3 56 5 5 2 52 1 4 4 54 1 2 3 1 58 9 45 3 11 21 19 12 14 23 34 55 55 5 16 42 8 5 6 8 6 2 5 6 6 2 22 1 13 8 8 8 11 11 70 131 1 4 1 107 2 98 1 3 1 1 132 2 131 1 132 1982 1979 2 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 // SPDX-License-Identifier: GPL-2.0-only /* * GENEVE: Generic Network Virtualization Encapsulation * * Copyright (c) 2015 Red Hat, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/hash.h> #include <net/ipv6_stubs.h> #include <net/dst_metadata.h> #include <net/gro_cells.h> #include <net/rtnetlink.h> #include <net/geneve.h> #include <net/gro.h> #include <net/netdev_lock.h> #include <net/protocol.h> #define GENEVE_NETDEV_VER "0.6" #define GENEVE_N_VID (1u << 24) #define GENEVE_VID_MASK (GENEVE_N_VID - 1) #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define GENEVE_VER 0 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) #define GENEVE_OPT_NETDEV_CLASS 0x100 #define GENEVE_OPT_GRO_HINT_SIZE 8 #define GENEVE_OPT_GRO_HINT_TYPE 1 #define GENEVE_OPT_GRO_HINT_LEN 1 struct geneve_opt_gro_hint { u8 inner_proto_id:2, nested_is_v6:1; u8 nested_nh_offset; u8 nested_tp_offset; u8 nested_hdr_len; }; struct geneve_skb_cb { unsigned int gro_hint_len; struct geneve_opt_gro_hint gro_hint; }; #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0])) /* per-network namespace private data for this module */ struct geneve_net { struct list_head geneve_list; /* sock_list is protected by rtnl lock */ struct list_head sock_list; }; static unsigned int geneve_net_id; struct geneve_dev_node { struct hlist_node hlist; struct geneve_dev *geneve; }; struct geneve_config { bool collect_md; bool use_udp6_rx_checksums; bool ttl_inherit; bool gro_hint; enum ifla_geneve_df df; bool inner_proto_inherit; u16 port_min; u16 port_max; /* Must be last --ends in a flexible-array member. */ struct ip_tunnel_info info; }; /* Pseudo network device */ struct geneve_dev { struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ #if IS_ENABLED(CONFIG_IPV6) struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ #endif struct list_head next; /* geneve's per namespace list */ struct gro_cells gro_cells; struct geneve_config cfg; }; struct geneve_sock { bool collect_md; bool gro_hint; struct list_head list; struct socket *sock; struct rcu_head rcu; int refcnt; struct hlist_head vni_list[VNI_HASH_SIZE]; }; static const __be16 proto_id_map[] = { htons(ETH_P_TEB), htons(ETH_P_IPV6), htons(ETH_P_IP) }; static int proto_to_id(__be16 proto) { int i; for (i = 0; i < ARRAY_SIZE(proto_id_map); i++) if (proto_id_map[i] == proto) return i; return -1; } static inline __u32 geneve_net_vni_hash(u8 vni[3]) { __u32 vnid; vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; return hash_32(vnid, VNI_HASH_BITS); } static __be64 vni_to_tunnel_id(const __u8 *vni) { #ifdef __BIG_ENDIAN return (vni[0] << 16) | (vni[1] << 8) | vni[2]; #else return (__force __be64)(((__force u64)vni[0] << 40) | ((__force u64)vni[1] << 48) | ((__force u64)vni[2] << 56)); #endif } /* Convert 64 bit tunnel ID to 24 bit VNI. */ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) { #ifdef __BIG_ENDIAN vni[0] = (__force __u8)(tun_id >> 16); vni[1] = (__force __u8)(tun_id >> 8); vni[2] = (__force __u8)tun_id; #else vni[0] = (__force __u8)((__force u64)tun_id >> 40); vni[1] = (__force __u8)((__force u64)tun_id >> 48); vni[2] = (__force __u8)((__force u64)tun_id >> 56); #endif } static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) { return !memcmp(vni, &tun_id[5], 3); } static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) { return gs->sock->sk->sk_family; } static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { struct hlist_head *vni_list_head; struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(node, vni_list_head, hlist) { if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && addr == node->geneve->cfg.info.key.u.ipv4.dst) return node->geneve; } return NULL; } #if IS_ENABLED(CONFIG_IPV6) static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, struct in6_addr addr6, u8 vni[]) { struct hlist_head *vni_list_head; struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(node, vni_list_head, hlist) { if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) return node->geneve; } return NULL; } #endif static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); } static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, struct sk_buff *skb) { static u8 zero_vni[3]; u8 *vni; if (geneve_get_sk_family(gs) == AF_INET) { struct iphdr *iph; __be32 addr; iph = ip_hdr(skb); /* outer IP header... */ if (gs->collect_md) { vni = zero_vni; addr = 0; } else { vni = geneve_hdr(skb)->vni; addr = iph->saddr; } return geneve_lookup(gs, addr, vni); #if IS_ENABLED(CONFIG_IPV6) } else if (geneve_get_sk_family(gs) == AF_INET6) { static struct in6_addr zero_addr6; struct ipv6hdr *ip6h; struct in6_addr addr6; ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ if (gs->collect_md) { vni = zero_vni; addr6 = zero_addr6; } else { vni = geneve_hdr(skb)->vni; addr6 = ip6h->saddr; } return geneve6_lookup(gs, addr6, vni); #endif } return NULL; } /* geneve receive/decap routine */ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct sk_buff *skb, const struct genevehdr *gnvh) { struct metadata_dst *tun_dst = NULL; unsigned int len; int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { IP_TUNNEL_DECLARE_FLAGS(flags) = { }; __set_bit(IP_TUNNEL_KEY_BIT, flags); __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); if (!tun_dst) { dev_dstats_rx_dropped(geneve->dev); goto drop; } /* Update tunnel dst according to Geneve options. */ ip_tunnel_flags_zero(flags); __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4, flags); } else { /* Drop packets w/ critical options, * since we don't support any... */ if (gnvh->critical) { DEV_STATS_INC(geneve->dev, rx_frame_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } if (tun_dst) skb_dst_set(skb, &tun_dst->dst); if (gnvh->proto_type == htons(ETH_P_TEB)) { skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } else { skb_reset_mac_header(skb); skb->dev = geneve->dev; skb->pkt_type = PACKET_HOST; } /* Save offset of outer header relative to skb->head, * because we are going to reset the network header to the inner header * and might change skb->head. */ nh = skb_network_header(skb) - skb->head; skb_reset_network_header(skb); if (!pskb_inet_may_pull(skb)) { DEV_STATS_INC(geneve->dev, rx_length_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } /* Get the outer header. */ oiph = skb->head + nh; if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) else err = IP6_ECN_decapsulate(oiph, skb); #endif if (unlikely(err)) { if (log_ecn_error) { if (geneve_get_sk_family(gs) == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, ((struct iphdr *)oiph)->tos); #if IS_ENABLED(CONFIG_IPV6) else net_info_ratelimited("non-ECT from %pI6\n", &((struct ipv6hdr *)oiph)->saddr); #endif } if (err > 1) { DEV_STATS_INC(geneve->dev, rx_frame_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } /* Skip the additional GRO stage when hints are in use. */ len = skb->len; if (skb->encapsulation) err = netif_rx(skb); else err = gro_cells_receive(&geneve->gro_cells, skb); if (likely(err == NET_RX_SUCCESS)) dev_dstats_rx_add(geneve->dev, len); return; drop: /* Consume bad packet */ kfree_skb(skb); } /* Setup stats when device is created */ static int geneve_init(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); int err; err = gro_cells_init(&geneve->gro_cells, dev); if (err) return err; err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); if (err) { gro_cells_destroy(&geneve->gro_cells); return err; } netdev_lockdep_set_classes(dev); return 0; } static void geneve_uninit(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); dst_cache_destroy(&geneve->cfg.info.dst_cache); gro_cells_destroy(&geneve->gro_cells); } static int geneve_hlen(const struct genevehdr *gh) { return sizeof(*gh) + gh->opt_len * 4; } /* * Look for GRO hint in the genenve options; if not found or does not pass basic * sanitization return 0, otherwise the offset WRT the geneve hdr start. */ static unsigned int geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type, unsigned int *gh_len) { struct geneve_opt *opt = (void *)(gh + 1); unsigned int id, opt_len = gh->opt_len; struct geneve_opt_gro_hint *gro_hint; while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) { if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) && opt->type == GENEVE_OPT_GRO_HINT_TYPE && opt->length == GENEVE_OPT_GRO_HINT_LEN) goto found; /* check for bad opt len */ if (opt->length + 1 >= opt_len) return 0; /* next opt */ opt_len -= opt->length + 1; opt = ((void *)opt) + ((opt->length + 1) << 2); } return 0; found: gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data; /* * Sanitize the hinted hdrs: the nested transport is UDP and must fit * the overall hinted hdr size. */ if (gro_hint->nested_tp_offset + sizeof(struct udphdr) > gro_hint->nested_hdr_len) return 0; if (gro_hint->nested_nh_offset + (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr)) > gro_hint->nested_tp_offset) return 0; /* Allow only supported L2. */ id = gro_hint->inner_proto_id; if (id >= ARRAY_SIZE(proto_id_map)) return 0; *type = proto_id_map[id]; *gh_len += gro_hint->nested_hdr_len; return (void *)gro_hint - (void *)gh; } static const struct geneve_opt_gro_hint * geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off) { return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off); } static unsigned int geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh, __be16 *type, unsigned int *gh_len) { const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk); if (!gs || !gs->gro_hint) return 0; return geneve_opt_gro_hint_off(gh, type, gh_len); } /* Validate the packet headers pointed by data WRT the provided hint */ static bool geneve_opt_gro_hint_validate(void *data, const struct geneve_opt_gro_hint *gro_hint) { void *nested_nh = data + gro_hint->nested_nh_offset; struct iphdr *iph; if (gro_hint->nested_is_v6) { struct ipv6hdr *ipv6h = nested_nh; struct ipv6_opt_hdr *opth; int offset, len; if (ipv6h->nexthdr == IPPROTO_UDP) return true; offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset; while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) { opth = data + offset; len = ipv6_optlen(opth); if (len + offset > gro_hint->nested_tp_offset) return false; if (opth->nexthdr == IPPROTO_UDP) return true; offset += len; } return false; } iph = nested_nh; if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) || iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5)) return false; return true; } /* * Validate the skb headers following the specified geneve hdr vs the * provided hint, including nested L4 checksum. * The caller already ensured that the relevant amount of data is available * in the linear part. */ static bool geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb, const struct genevehdr *gh, const struct geneve_opt_gro_hint *gro_hint) { unsigned int plen, gh_len = geneve_hlen(gh); void *nested = (void *)gh + gh_len; struct udphdr *nested_uh; unsigned int nested_len; struct ipv6hdr *ipv6h; struct iphdr *iph; __wsum csum, psum; if (!geneve_opt_gro_hint_validate(nested, gro_hint)) return false; /* Use GRO hints with nested csum only if the outer header has csum. */ nested_uh = nested + gro_hint->nested_tp_offset; if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL) return true; if (!NAPI_GRO_CB(skb)->csum_valid) return false; /* Compute the complete checksum up to the nested transport. */ plen = gh_len + gro_hint->nested_tp_offset; csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0)); nested_len = skb_gro_len(skb) - plen; /* Compute the nested pseudo header csum. */ ipv6h = nested + gro_hint->nested_nh_offset; iph = (struct iphdr *)ipv6h; psum = gro_hint->nested_is_v6 ? ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, nested_len, IPPROTO_UDP, 0)) : csum_tcpudp_nofold(iph->saddr, iph->daddr, nested_len, IPPROTO_UDP, 0); return !csum_fold(csum_add(psum, csum)); } static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, unsigned int gh_len, struct genevehdr **geneveh) { const struct geneve_opt_gro_hint *gro_hint; unsigned int len, total_len, hint_off; struct ipv6hdr *ipv6h; struct iphdr *iph; struct udphdr *uh; __be16 p; hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len); if (!hint_off) return 0; if (!skb_is_gso(skb)) return 0; gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len))) return -ENOMEM; *geneveh = geneve_hdr(skb); gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); /* * Validate hints from untrusted source before accessing * the headers; csum will be checked later by the nested * protocol rx path. */ if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY && !geneve_opt_gro_hint_validate(skb->data, gro_hint))) return -EINVAL; ipv6h = (void *)skb->data + gro_hint->nested_nh_offset; iph = (struct iphdr *)ipv6h; total_len = skb->len - gro_hint->nested_nh_offset; if (total_len > GRO_LEGACY_MAX_SIZE) return -E2BIG; /* * After stripping the outer encap, the packet still carries a * tunnel encapsulation: the nested one. */ skb->encapsulation = 1; /* GSO expect a valid transpor header, move it to the current one. */ skb_set_transport_header(skb, gro_hint->nested_tp_offset); /* Adjust the nested IP{6} hdr to actual GSO len. */ if (gro_hint->nested_is_v6) { ipv6h->payload_len = htons(total_len - sizeof(*ipv6h)); } else { __be16 old_len = iph->tot_len; iph->tot_len = htons(total_len); /* For IPv4 additionally adjust the nested csum. */ csum_replace2(&iph->check, old_len, iph->tot_len); ip_send_check(iph); } /* Adjust the nested UDP header len and checksum. */ uh = udp_hdr(skb); uh->len = htons(skb->len - gro_hint->nested_tp_offset); if (uh->check) { len = skb->len - gro_hint->nested_nh_offset; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; if (gro_hint->nested_is_v6) uh->check = ~udp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0); else uh->check = ~udp_v4_check(len, iph->saddr, iph->daddr, 0); } else { skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; } return 0; } /* Callback from net/ipv4/udp.c to receive packets */ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; struct geneve_dev *geneve; struct geneve_sock *gs; __be16 inner_proto; int opts_len; /* Need UDP and Geneve header to be present */ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) goto drop; /* Return packets with reserved bits set */ geneveh = geneve_hdr(skb); if (unlikely(geneveh->ver != GENEVE_VER)) goto drop; gs = rcu_dereference_sk_user_data(sk); if (!gs) goto drop; geneve = geneve_lookup_skb(gs, skb); if (!geneve) goto drop; inner_proto = geneveh->proto_type; if (unlikely((!geneve->cfg.inner_proto_inherit && inner_proto != htons(ETH_P_TEB)))) { dev_dstats_rx_dropped(geneve->dev); goto drop; } opts_len = geneveh->opt_len * 4; if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, !net_eq(geneve->net, dev_net(geneve->dev)))) { dev_dstats_rx_dropped(geneve->dev); goto drop; } /* * After hint processing, the transport header points to the inner one * and we can't use anymore on geneve_hdr(). */ geneveh = geneve_hdr(skb); if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) + opts_len, &geneveh)) { DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } geneve_rx(geneve, gs, skb, geneveh); return 0; drop: /* Consume bad packet */ kfree_skb(skb); return 0; } /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; struct geneve_sock *gs; u8 zero_vni[3] = { 0 }; u8 *vni = zero_vni; if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) return -EINVAL; geneveh = geneve_hdr(skb); if (geneveh->ver != GENEVE_VER) return -EINVAL; if (geneveh->proto_type != htons(ETH_P_TEB)) return -EINVAL; gs = rcu_dereference_sk_user_data(sk); if (!gs) return -ENOENT; if (geneve_get_sk_family(gs) == AF_INET) { struct iphdr *iph = ip_hdr(skb); __be32 addr4 = 0; if (!gs->collect_md) { vni = geneve_hdr(skb)->vni; addr4 = iph->daddr; } return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; } #if IS_ENABLED(CONFIG_IPV6) if (geneve_get_sk_family(gs) == AF_INET6) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct in6_addr addr6; memset(&addr6, 0, sizeof(struct in6_addr)); if (!gs->collect_md) { vni = geneve_hdr(skb)->vni; addr6 = ip6h->daddr; } return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; } #endif return -EPFNOSUPPORT; } static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port, bool ipv6_rx_csum) { struct socket *sock; struct udp_port_cfg udp_conf; int err; memset(&udp_conf, 0, sizeof(udp_conf)); if (ipv6) { udp_conf.family = AF_INET6; udp_conf.ipv6_v6only = 1; udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; } else { udp_conf.family = AF_INET; udp_conf.local_ip.s_addr = htonl(INADDR_ANY); } udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); if (err < 0) return ERR_PTR(err); udp_allow_gso(sock->sk); return sock; } static bool geneve_hdr_match(struct sk_buff *skb, const struct genevehdr *gh, const struct genevehdr *gh2, unsigned int hint_off) { const struct geneve_opt_gro_hint *gro_hint; void *nested, *nested2, *nh, *nh2; struct udphdr *udp, *udp2; unsigned int gh_len; /* Match the geneve hdr and options */ if (gh->opt_len != gh2->opt_len) return false; gh_len = geneve_hlen(gh); if (memcmp(gh, gh2, gh_len)) return false; if (!hint_off) return true; /* * When gro is present consider the nested headers as part * of the geneve options */ nested = (void *)gh + gh_len; nested2 = (void *)gh2 + gh_len; gro_hint = geneve_opt_gro_hint(gh, hint_off); if (!memcmp(nested, nested2, gro_hint->nested_hdr_len)) return true; /* * The nested headers differ; the packets can still belong to * the same flow when IPs/proto/ports match; if so flushing is * required. */ nh = nested + gro_hint->nested_nh_offset; nh2 = nested2 + gro_hint->nested_nh_offset; if (gro_hint->nested_is_v6) { struct ipv6hdr *iph = nh, *iph2 = nh2; unsigned int nested_nlen; __be32 first_word; first_word = *(__be32 *)iph ^ *(__be32 *)iph2; if ((first_word & htonl(0xF00FFFFF)) || !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || iph->nexthdr != iph2->nexthdr) return false; nested_nlen = gro_hint->nested_tp_offset - gro_hint->nested_nh_offset; if (nested_nlen > sizeof(struct ipv6hdr) && (memcmp(iph + 1, iph2 + 1, nested_nlen - sizeof(struct ipv6hdr)))) return false; } else { struct iphdr *iph = nh, *iph2 = nh2; if ((iph->protocol ^ iph2->protocol) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) return false; } udp = nested + gro_hint->nested_tp_offset; udp2 = nested2 + gro_hint->nested_tp_offset; if (udp->source != udp2->source || udp->dest != udp2->dest || udp->check != udp2->check) return false; NAPI_GRO_CB(skb)->flush = 1; return true; } static struct sk_buff *geneve_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { unsigned int hlen, gh_len, off_gnv, hint_off; const struct geneve_opt_gro_hint *gro_hint; const struct packet_offload *ptype; struct genevehdr *gh, *gh2; struct sk_buff *pp = NULL; struct sk_buff *p; int flush = 1; __be16 type; off_gnv = skb_gro_offset(skb); hlen = off_gnv + sizeof(*gh); gh = skb_gro_header(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; if (gh->ver != GENEVE_VER || gh->oam) goto out; gh_len = geneve_hlen(gh); type = gh->proto_type; hlen = off_gnv + gh_len; if (!skb_gro_may_pull(skb, hlen)) { gh = skb_gro_header_slow(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; } /* The GRO hint/nested hdr could use a different ethernet type. */ hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len); if (hint_off) { /* * If the hint is present, and nested hdr validation fails, do * not attempt plain GRO: it will ignore inner hdrs and cause * OoO. */ gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv); if (unlikely(!gh)) goto out; gro_hint = geneve_opt_gro_hint(gh, hint_off); if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint)) goto out; } list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; gh2 = (struct genevehdr *)(p->data + off_gnv); if (!geneve_hdr_match(skb, gh, gh2, hint_off)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); if (likely(type == htons(ETH_P_TEB))) return call_gro_receive(eth_gro_receive, head, skb); ptype = gro_find_receive_by_type(type); if (!ptype) goto out; pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out: skb_gro_flush_final(skb, pp, flush); return pp; } static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { struct genevehdr *gh; struct packet_offload *ptype; __be16 type; int gh_len; int err = -ENOSYS; gh = (struct genevehdr *)(skb->data + nhoff); gh_len = geneve_hlen(gh); type = gh->proto_type; geneve_opt_gro_hint_off(gh, &type, &gh_len); /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ if (likely(type == htons(ETH_P_TEB))) return eth_gro_complete(skb, nhoff + gh_len); ptype = gro_find_complete_by_type(type); if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); skb_set_inner_mac_header(skb, nhoff + gh_len); return err; } /* Create new listen socket if needed */ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, bool ipv6, bool ipv6_rx_csum) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; int h; gs = kzalloc(sizeof(*gs), GFP_KERNEL); if (!gs) return ERR_PTR(-ENOMEM); sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); if (IS_ERR(sock)) { kfree(gs); return ERR_CAST(sock); } gs->sock = sock; gs->refcnt = 1; for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&gs->vni_list[h]); /* Initialize the geneve udp offloads structure */ udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); /* Mark socket as an encapsulation socket */ memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); tunnel_cfg.sk_user_data = gs; tunnel_cfg.encap_type = 1; tunnel_cfg.gro_receive = geneve_gro_receive; tunnel_cfg.gro_complete = geneve_gro_complete; tunnel_cfg.encap_rcv = geneve_udp_encap_recv; tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); list_add(&gs->list, &gn->sock_list); return gs; } static void __geneve_sock_release(struct geneve_sock *gs) { if (!gs || --gs->refcnt) return; list_del(&gs->list); udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); } static void geneve_sock_release(struct geneve_dev *geneve) { struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); #if IS_ENABLED(CONFIG_IPV6) struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); rcu_assign_pointer(geneve->sock6, NULL); #endif rcu_assign_pointer(geneve->sock4, NULL); synchronize_net(); __geneve_sock_release(gs4); #if IS_ENABLED(CONFIG_IPV6) __geneve_sock_release(gs6); #endif } static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, sa_family_t family, __be16 dst_port, bool gro_hint) { struct geneve_sock *gs; list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == dst_port && geneve_get_sk_family(gs) == family && gs->gro_hint == gro_hint) { return gs; } } return NULL; } static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) { struct net *net = geneve->net; struct geneve_net *gn = net_generic(net, geneve_net_id); bool gro_hint = geneve->cfg.gro_hint; struct geneve_dev_node *node; struct geneve_sock *gs; __u8 vni[3]; __u32 hash; gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst, gro_hint); if (gs) { gs->refcnt++; goto out; } gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, geneve->cfg.use_udp6_rx_checksums); if (IS_ERR(gs)) return PTR_ERR(gs); out: gs->collect_md = geneve->cfg.collect_md; gs->gro_hint = gro_hint; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) { rcu_assign_pointer(geneve->sock6, gs); node = &geneve->hlist6; } else #endif { rcu_assign_pointer(geneve->sock4, gs); node = &geneve->hlist4; } node->geneve = geneve; tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); hash = geneve_net_vni_hash(vni); hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); return 0; } static int geneve_open(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); bool metadata = geneve->cfg.collect_md; bool ipv4, ipv6; int ret = 0; ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; ipv4 = !ipv6 || metadata; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) { ret = geneve_sock_add(geneve, true); if (ret < 0 && ret != -EAFNOSUPPORT) ipv4 = false; } #endif if (ipv4) ret = geneve_sock_add(geneve, false); if (ret < 0) geneve_sock_release(geneve); return ret; } static int geneve_stop(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); hlist_del_init_rcu(&geneve->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&geneve->hlist6.hlist); #endif geneve_sock_release(geneve); return 0; } static void geneve_build_header(struct genevehdr *geneveh, const struct ip_tunnel_info *info, __be16 inner_proto) { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, info->key.tun_flags); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0; if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) ip_tunnel_info_opts_get(geneveh->options, info); } static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve, struct sk_buff *skb) { struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); struct geneve_opt_gro_hint *hint; unsigned int nhlen; bool nested_is_v6; int id; BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb)); cb->gro_hint_len = 0; /* Try to add the GRO hint only in case of double encap. */ if (!geneve->cfg.gro_hint || !skb->encapsulation) return 0; /* * The nested headers must fit the geneve opt len fields and the * nested encap must carry a nested transport (UDP) header. */ nhlen = skb_inner_mac_header(skb) - skb->data; if (nhlen > 255 || !skb_transport_header_was_set(skb) || skb->inner_protocol_type != ENCAP_TYPE_ETHER || (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen)) return 0; id = proto_to_id(skb->inner_protocol); if (id < 0) return 0; nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); if (nested_is_v6) { int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 proto = ipv6_hdr(skb)->nexthdr; __be16 foff; if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 || proto != IPPROTO_UDP) return 0; } else { if (ip_hdr(skb)->protocol != IPPROTO_UDP) return 0; } hint = &cb->gro_hint; memset(hint, 0, sizeof(*hint)); hint->inner_proto_id = id; hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); hint->nested_nh_offset = skb_network_offset(skb); hint->nested_tp_offset = skb_transport_offset(skb); hint->nested_hdr_len = nhlen; cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE; return GENEVE_OPT_GRO_HINT_SIZE; } static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size, const struct geneve_opt_gro_hint *hint) { struct geneve_opt *gro_opt; /* geneve_build_header() did not took in account the GRO hint. */ gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2; gro_opt = (void *)(gnvh + 1) + opt_size; memset(gro_opt, 0, sizeof(*gro_opt)); gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS); gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE; gro_opt->length = GENEVE_OPT_GRO_HINT_LEN; memcpy(gro_opt + 1, hint, sizeof(*hint)); } static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, const struct ip_tunnel_info *info, const struct geneve_dev *geneve, int ip_hdr_len) { bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); struct genevehdr *gnvh; __be16 inner_proto; bool double_encap; int min_headroom; int opt_size; int err; skb_reset_mac_header(skb); skb_scrub_packet(skb, xnet); opt_size = info->options_len + cb->gro_hint_len; min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + GENEVE_BASE_HLEN + opt_size + ip_hdr_len; err = skb_cow_head(skb, min_headroom); if (unlikely(err)) goto free_dst; double_encap = udp_tunnel_handle_partial(skb); err = udp_tunnel_handle_offloads(skb, udp_sum); if (err) goto free_dst; gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size); inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); geneve_build_header(gnvh, info, inner_proto); if (cb->gro_hint_len) geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint); udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto); return 0; free_dst: dst_release(dst); return err; } static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, const struct ip_tunnel_info *info, bool *use_cache) { struct geneve_dev *geneve = netdev_priv(dev); u8 dsfield; dsfield = info->key.tos; if (dsfield == 1 && !geneve->cfg.collect_md) { dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); *use_cache = false; } return dsfield; } static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; bool use_cache; __u8 tos, ttl; __be16 df = 0; __be32 saddr; __be16 sport; int err; if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) return -EINVAL; if (!gs4) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); err = skb_tunnel_check_pmtu(skb, &rt->dst, GENEVE_IPV4_HLEN + info->options_len + geneve_build_gro_hint_opt(geneve, skb), netif_is_any_bridge_port(dev)); if (err < 0) { dst_release(&rt->dst); return err; } else if (err) { struct ip_tunnel_info *info; info = skb_tunnel_info(skb); if (info) { struct ip_tunnel_info *unclone; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { dst_release(&rt->dst); return -ENOMEM; } unclone->key.u.ipv4.dst = saddr; unclone->key.u.ipv4.src = info->key.u.ipv4.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { dst_release(&rt->dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); dst_release(&rt->dst); return -EMSGSIZE; } tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { ttl = key->ttl; df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? htons(IP_DF) : 0; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else ttl = key->ttl; ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); if (geneve->cfg.df == GENEVE_DF_SET) { df = htons(IP_DF); } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { struct ethhdr *eth = skb_eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_IPV6) { df = htons(IP_DF); } else if (ntohs(eth->h_proto) == ETH_P_IP) { struct iphdr *iph = ip_hdr(skb); if (iph->frag_off & htons(IP_DF)) df = htons(IP_DF); } } } err = geneve_build_skb(&rt->dst, skb, info, geneve, sizeof(struct iphdr)); if (unlikely(err)) return err; udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 0); return 0; } #if IS_ENABLED(CONFIG_IPV6) static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; struct dst_entry *dst = NULL; struct in6_addr saddr; bool use_cache; __u8 prio, ttl; __be16 sport; int err; if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) return -EINVAL; if (!gs6) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); prio = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); err = skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len + geneve_build_gro_hint_opt(geneve, skb), netif_is_any_bridge_port(dev)); if (err < 0) { dst_release(dst); return err; } else if (err) { struct ip_tunnel_info *info = skb_tunnel_info(skb); if (info) { struct ip_tunnel_info *unclone; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { dst_release(dst); return -ENOMEM; } unclone->key.u.ipv6.dst = saddr; unclone->key.u.ipv6.src = info->key.u.ipv6.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { dst_release(dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); dst_release(dst); return -EMSGSIZE; } prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { ttl = key->ttl; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else ttl = key->ttl; ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr)); if (unlikely(err)) return err; udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, &saddr, &key->u.ipv6.dst, prio, ttl, info->key.label, sport, geneve->cfg.info.key.tp_dst, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 0); return 0; } #endif static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = NULL; int err; if (geneve->cfg.collect_md) { info = skb_tunnel_info(skb); if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); dev_kfree_skb(skb); dev_dstats_tx_dropped(dev); return NETDEV_TX_OK; } } else { info = &geneve->cfg.info; } rcu_read_lock(); #if IS_ENABLED(CONFIG_IPV6) if (info->mode & IP_TUNNEL_INFO_IPV6) err = geneve6_xmit_skb(skb, dev, geneve, info); else #endif err = geneve_xmit_skb(skb, dev, geneve, info); rcu_read_unlock(); if (likely(!err)) return NETDEV_TX_OK; if (err != -EMSGSIZE) dev_kfree_skb(skb); if (err == -ELOOP) DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) DEV_STATS_INC(dev, tx_carrier_errors); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static int geneve_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu > dev->max_mtu) new_mtu = dev->max_mtu; else if (new_mtu < dev->min_mtu) new_mtu = dev->min_mtu; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); bool use_cache; __be32 saddr; u8 tos; if (!gs4) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); info->key.u.ipv4.src = saddr; #if IS_ENABLED(CONFIG_IPV6) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); struct in6_addr saddr; bool use_cache; u8 prio; if (!gs6) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); prio = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); dst_release(dst); info->key.u.ipv6.src = saddr; #endif } else { return -EINVAL; } info->key.tp_src = sport; info->key.tp_dst = geneve->cfg.info.key.tp_dst; return 0; } static const struct net_device_ops geneve_netdev_ops = { .ndo_init = geneve_init, .ndo_uninit = geneve_uninit, .ndo_open = geneve_open, .ndo_stop = geneve_stop, .ndo_start_xmit = geneve_xmit, .ndo_change_mtu = geneve_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_fill_metadata_dst = geneve_fill_metadata_dst, }; static void geneve_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); } static const struct ethtool_ops geneve_ethtool_ops = { .get_drvinfo = geneve_get_drvinfo, .get_link = ethtool_op_get_link, }; /* Info for udev, that this is a virtual tunnel endpoint */ static const struct device_type geneve_type = { .name = "geneve", }; /* Calls the ndo_udp_tunnel_add of the caller in order to * supply the listening GENEVE udp ports. Callers are expected * to implement the ndo_udp_tunnel_add. */ static void geneve_offload_rx_ports(struct net_device *dev, bool push) { struct net *net = dev_net(dev); struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; ASSERT_RTNL(); list_for_each_entry(gs, &gn->sock_list, list) { if (push) { udp_tunnel_push_rx_port(dev, gs->sock, UDP_TUNNEL_TYPE_GENEVE); } else { udp_tunnel_drop_rx_port(dev, gs->sock, UDP_TUNNEL_TYPE_GENEVE); } } } /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &geneve_netdev_ops; dev->ethtool_ops = &geneve_ethtool_ops; dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &geneve_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; /* Partial features are disabled by default. */ dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES; dev->hw_features |= NETIF_F_GSO_PARTIAL; dev->hw_enc_features = dev->hw_features; dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES; dev->mangleid_features = NETIF_F_GSO_PARTIAL; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; /* MTU range: 68 - (something less than 65535) */ dev->min_mtu = ETH_MIN_MTU; /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. This will be further reduced by IPvX hdr size. */ dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; netif_keep_dst(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->lltx = true; eth_hw_addr_random(dev); } static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, [IFLA_GENEVE_ID] = { .type = NLA_U32 }, [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, [IFLA_GENEVE_DF] = { .type = NLA_U8 }, [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], "Provided link layer address is not Ethernet"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], "Provided Ethernet address is not unicast"); return -EADDRNOTAVAIL; } } if (!data) { NL_SET_ERR_MSG(extack, "Not enough attributes provided to perform the operation"); return -EINVAL; } if (data[IFLA_GENEVE_ID]) { __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); if (vni >= GENEVE_N_VID) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], "Geneve ID must be lower than 16777216"); return -ERANGE; } } if (data[IFLA_GENEVE_DF]) { enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); if (df < 0 || df > GENEVE_DF_MAX) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], "Invalid DF attribute"); return -EINVAL; } } if (data[IFLA_GENEVE_PORT_RANGE]) { const struct ifla_geneve_port_range *p; p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); if (ntohs(p->high) < ntohs(p->low)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], "Invalid source port range"); return -EINVAL; } } return 0; } static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, const struct ip_tunnel_info *info, bool *tun_on_same_port, bool *tun_collect_md) { struct geneve_dev *geneve, *t = NULL; *tun_on_same_port = false; *tun_collect_md = false; list_for_each_entry(geneve, &gn->geneve_list, next) { if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { *tun_collect_md = geneve->cfg.collect_md; *tun_on_same_port = true; } if (info->key.tun_id == geneve->cfg.info.key.tun_id && info->key.tp_dst == geneve->cfg.info.key.tp_dst && !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) t = geneve; } return t; } static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { return !(info->key.tun_id || info->key.tos || !ip_tunnel_flags_empty(info->key.tun_flags) || info->key.ttl || info->key.label || info->key.tp_src || memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, struct ip_tunnel_info *b) { if (ip_tunnel_info_af(a) == AF_INET) return a->key.u.ipv4.dst == b->key.u.ipv4.dst; else return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); } static int geneve_configure(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack, const struct geneve_config *cfg) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); const struct ip_tunnel_info *info = &cfg->info; bool tun_collect_md, tun_on_same_port; int err, encap_len; if (cfg->collect_md && !is_tnl_info_zero(info)) { NL_SET_ERR_MSG(extack, "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); return -EINVAL; } geneve->net = net; geneve->dev = dev; t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); if (t) return -EBUSY; /* make enough headroom for basic scenario */ encap_len = GENEVE_BASE_HLEN + ETH_HLEN; if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { encap_len += sizeof(struct iphdr); dev->max_mtu -= sizeof(struct iphdr); } else { encap_len += sizeof(struct ipv6hdr); dev->max_mtu -= sizeof(struct ipv6hdr); } dev->needed_headroom = encap_len + ETH_HLEN; if (cfg->collect_md) { if (tun_on_same_port) { NL_SET_ERR_MSG(extack, "There can be only one externally controlled device on a destination port"); return -EPERM; } } else { if (tun_collect_md) { NL_SET_ERR_MSG(extack, "There already exists an externally controlled device on this destination port"); return -EPERM; } } dst_cache_reset(&geneve->cfg.info.dst_cache); memcpy(&geneve->cfg, cfg, sizeof(*cfg)); if (geneve->cfg.inner_proto_inherit) { dev->header_ops = NULL; dev->type = ARPHRD_NONE; dev->hard_header_len = 0; dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP; } err = register_netdevice(dev); if (err) return err; list_add(&geneve->next, &gn->geneve_list); return 0; } static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) { memset(info, 0, sizeof(*info)); info->key.tp_dst = htons(dst_port); } static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack, struct geneve_config *cfg, bool changelink) { struct ip_tunnel_info *info = &cfg->info; int attrtype; if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { NL_SET_ERR_MSG(extack, "Cannot specify both IPv4 and IPv6 Remote addresses"); return -EINVAL; } if (data[IFLA_GENEVE_REMOTE]) { if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { attrtype = IFLA_GENEVE_REMOTE; goto change_notsup; } info->key.u.ipv4.dst = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); if (ipv4_is_multicast(info->key.u.ipv4.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], "Remote IPv4 address cannot be Multicast"); return -EINVAL; } } if (data[IFLA_GENEVE_REMOTE6]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { attrtype = IFLA_GENEVE_REMOTE6; goto change_notsup; } info->mode = IP_TUNNEL_INFO_IPV6; info->key.u.ipv6.dst = nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); if (ipv6_addr_type(&info->key.u.ipv6.dst) & IPV6_ADDR_LINKLOCAL) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "Remote IPv6 address cannot be link-local"); return -EINVAL; } if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "Remote IPv6 address cannot be Multicast"); return -EINVAL; } __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); cfg->use_udp6_rx_checksums = true; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_ID]) { __u32 vni; __u8 tvni[3]; __be64 tunid; vni = nla_get_u32(data[IFLA_GENEVE_ID]); tvni[0] = (vni & 0x00ff0000) >> 16; tvni[1] = (vni & 0x0000ff00) >> 8; tvni[2] = vni & 0x000000ff; tunid = vni_to_tunnel_id(tvni); if (changelink && (tunid != info->key.tun_id)) { attrtype = IFLA_GENEVE_ID; goto change_notsup; } info->key.tun_id = tunid; } if (data[IFLA_GENEVE_TTL_INHERIT]) { if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) cfg->ttl_inherit = true; else cfg->ttl_inherit = false; } else if (data[IFLA_GENEVE_TTL]) { info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); cfg->ttl_inherit = false; } if (data[IFLA_GENEVE_TOS]) info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); if (data[IFLA_GENEVE_DF]) cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); if (data[IFLA_GENEVE_LABEL]) { info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & IPV6_FLOWLABEL_MASK; if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], "Label attribute only applies for IPv6 Geneve devices"); return -EINVAL; } } if (data[IFLA_GENEVE_PORT]) { if (changelink) { attrtype = IFLA_GENEVE_PORT; goto change_notsup; } info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); } if (data[IFLA_GENEVE_PORT_RANGE]) { const struct ifla_geneve_port_range *p; if (changelink) { attrtype = IFLA_GENEVE_PORT_RANGE; goto change_notsup; } p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); cfg->port_min = ntohs(p->low); cfg->port_max = ntohs(p->high); } if (data[IFLA_GENEVE_COLLECT_METADATA]) { if (changelink) { attrtype = IFLA_GENEVE_COLLECT_METADATA; goto change_notsup; } cfg->collect_md = true; } if (data[IFLA_GENEVE_UDP_CSUM]) { if (changelink) { attrtype = IFLA_GENEVE_UDP_CSUM; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink) { attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink) { attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) cfg->use_udp6_rx_checksums = false; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { if (changelink) { attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; goto change_notsup; } cfg->inner_proto_inherit = true; } if (data[IFLA_GENEVE_GRO_HINT]) { if (changelink) { attrtype = IFLA_GENEVE_GRO_HINT; goto change_notsup; } cfg->gro_hint = true; } return 0; change_notsup: NL_SET_ERR_MSG_ATTR(extack, data[attrtype], "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported"); return -EOPNOTSUPP; } static void geneve_link_config(struct net_device *dev, struct ip_tunnel_info *info, struct nlattr *tb[]) { struct geneve_dev *geneve = netdev_priv(dev); int ldev_mtu = 0; if (tb[IFLA_MTU]) { geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); return; } switch (ip_tunnel_info_af(info)) { case AF_INET: { struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; struct rtable *rt = ip_route_output_key(geneve->net, &fl4); if (!IS_ERR(rt) && rt->dst.dev) { ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; ip_rt_put(rt); } break; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct rt6_info *rt; if (!__in6_dev_get(dev)) break; rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, NULL, 0); if (rt && rt->dst.dev) ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; ip6_rt_put(rt); break; } #endif } if (ldev_mtu <= 0) return; geneve_change_mtu(dev, ldev_mtu - info->options_len); } static int geneve_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct geneve_config cfg = { .df = GENEVE_DF_UNSET, .use_udp6_rx_checksums = false, .ttl_inherit = false, .collect_md = false, .port_min = 1, .port_max = USHRT_MAX, }; int err; init_tnl_info(&cfg.info, GENEVE_UDP_PORT); err = geneve_nl2info(tb, data, extack, &cfg, false); if (err) return err; err = geneve_configure(link_net, dev, extack, &cfg); if (err) return err; geneve_link_config(dev, &cfg.info, tb); return 0; } /* Quiesces the geneve device data path for both TX and RX. * * On transmit geneve checks for non-NULL geneve_sock before it proceeds. * So, if we set that socket to NULL under RCU and wait for synchronize_net() * to complete for the existing set of in-flight packets to be transmitted, * then we would have quiesced the transmit data path. All the future packets * will get dropped until we unquiesce the data path. * * On receive geneve dereference the geneve_sock stashed in the socket. So, * if we set that to NULL under RCU and wait for synchronize_net() to * complete, then we would have quiesced the receive data path. */ static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, struct geneve_sock **gs6) { *gs4 = rtnl_dereference(geneve->sock4); rcu_assign_pointer(geneve->sock4, NULL); if (*gs4) rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); #if IS_ENABLED(CONFIG_IPV6) *gs6 = rtnl_dereference(geneve->sock6); rcu_assign_pointer(geneve->sock6, NULL); if (*gs6) rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); #else *gs6 = NULL; #endif synchronize_net(); } /* Resumes the geneve device data path for both TX and RX. */ static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, struct geneve_sock __maybe_unused *gs6) { rcu_assign_pointer(geneve->sock4, gs4); if (gs4) rcu_assign_sk_user_data(gs4->sock->sk, gs4); #if IS_ENABLED(CONFIG_IPV6) rcu_assign_pointer(geneve->sock6, gs6); if (gs6) rcu_assign_sk_user_data(gs6->sock->sk, gs6); #endif synchronize_net(); } static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs4, *gs6; struct geneve_config cfg; int err; /* If the geneve device is configured for metadata (or externally * controlled, for example, OVS), then nothing can be changed. */ if (geneve->cfg.collect_md) return -EOPNOTSUPP; /* Start with the existing info. */ memcpy(&cfg, &geneve->cfg, sizeof(cfg)); err = geneve_nl2info(tb, data, extack, &cfg, true); if (err) return err; if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { dst_cache_reset(&cfg.info.dst_cache); geneve_link_config(dev, &cfg.info, tb); } geneve_quiesce(geneve, &gs4, &gs6); memcpy(&geneve->cfg, &cfg, sizeof(cfg)); geneve_unquiesce(geneve, gs4, gs6); return 0; } static void geneve_dellink(struct net_device *dev, struct list_head *head) { struct geneve_dev *geneve = netdev_priv(dev); list_del(&geneve->next); unregister_netdevice_queue(dev, head); } static size_t geneve_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */ 0; } static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = &geneve->cfg.info; bool ttl_inherit = geneve->cfg.ttl_inherit; bool metadata = geneve->cfg.collect_md; struct ifla_geneve_port_range ports = { .low = htons(geneve->cfg.port_min), .high = htons(geneve->cfg.port_max), }; __u8 tmp_vni[3]; __u32 vni; tunnel_id_to_vni(info->key.tun_id, tmp_vni); vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) goto nla_put_failure; if (!metadata && ip_tunnel_info_af(info) == AF_INET) { if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, info->key.u.ipv4.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) } else if (!metadata) { if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, &info->key.u.ipv6.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))) goto nla_put_failure; #endif } if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) goto nla_put_failure; if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) goto nla_put_failure; if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, !geneve->cfg.use_udp6_rx_checksums)) goto nla_put_failure; #endif if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) goto nla_put_failure; if (geneve->cfg.inner_proto_inherit && nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) goto nla_put_failure; if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) goto nla_put_failure; if (geneve->cfg.gro_hint && nla_put_flag(skb, IFLA_GENEVE_GRO_HINT)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops geneve_link_ops __read_mostly = { .kind = "geneve", .maxtype = IFLA_GENEVE_MAX, .policy = geneve_policy, .priv_size = sizeof(struct geneve_dev), .setup = geneve_setup, .validate = geneve_validate, .newlink = geneve_newlink, .changelink = geneve_changelink, .dellink = geneve_dellink, .get_size = geneve_get_size, .fill_info = geneve_fill_info, }; struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port) { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; LIST_HEAD(list_kill); int err; struct geneve_config cfg = { .df = GENEVE_DF_UNSET, .use_udp6_rx_checksums = true, .ttl_inherit = false, .collect_md = true, .port_min = 1, .port_max = USHRT_MAX, }; memset(tb, 0, sizeof(tb)); dev = rtnl_create_link(net, name, name_assign_type, &geneve_link_ops, tb, NULL); if (IS_ERR(dev)) return dev; init_tnl_info(&cfg.info, dst_port); err = geneve_configure(net, dev, NULL, &cfg); if (err) { free_netdev(dev); return ERR_PTR(err); } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. */ err = geneve_change_mtu(dev, IP_MAX_MTU); if (err) goto err; err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto err; return dev; err: geneve_dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); static int geneve_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) geneve_offload_rx_ports(dev, true); else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) geneve_offload_rx_ports(dev, false); return NOTIFY_DONE; } static struct notifier_block geneve_notifier_block __read_mostly = { .notifier_call = geneve_netdevice_event, }; static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); INIT_LIST_HEAD(&gn->geneve_list); INIT_LIST_HEAD(&gn->sock_list); return 0; } static void __net_exit geneve_exit_rtnl_net(struct net *net, struct list_head *dev_to_kill) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) geneve_dellink(geneve->dev, dev_to_kill); } static void __net_exit geneve_exit_net(struct net *net) { const struct geneve_net *gn = net_generic(net, geneve_net_id); WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, .exit_rtnl = geneve_exit_rtnl_net, .exit = geneve_exit_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; static int __init geneve_init_module(void) { int rc; rc = register_pernet_subsys(&geneve_net_ops); if (rc) goto out1; rc = register_netdevice_notifier(&geneve_notifier_block); if (rc) goto out2; rc = rtnl_link_register(&geneve_link_ops); if (rc) goto out3; return 0; out3: unregister_netdevice_notifier(&geneve_notifier_block); out2: unregister_pernet_subsys(&geneve_net_ops); out1: return rc; } late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { rtnl_link_unregister(&geneve_link_ops); unregister_netdevice_notifier(&geneve_notifier_block); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); MODULE_LICENSE("GPL"); MODULE_VERSION(GENEVE_NETDEV_VER); MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); MODULE_ALIAS_RTNL_LINK("geneve");
1 1 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * * Author: Artem Bityutskiy (Битюцкий Артём) */ #include "ubi.h" #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/fault-inject.h> #ifdef CONFIG_MTD_UBI_FAULT_INJECTION static DECLARE_FAULT_ATTR(fault_eccerr_attr); static DECLARE_FAULT_ATTR(fault_bitflips_attr); static DECLARE_FAULT_ATTR(fault_read_failure_attr); static DECLARE_FAULT_ATTR(fault_write_failure_attr); static DECLARE_FAULT_ATTR(fault_erase_failure_attr); static DECLARE_FAULT_ATTR(fault_power_cut_attr); static DECLARE_FAULT_ATTR(fault_io_ff_attr); static DECLARE_FAULT_ATTR(fault_io_ff_bitflips_attr); static DECLARE_FAULT_ATTR(fault_bad_hdr_attr); static DECLARE_FAULT_ATTR(fault_bad_hdr_ebadmsg_attr); #define FAIL_ACTION(name, fault_attr) \ bool should_fail_##name(void) \ { \ return should_fail(&fault_attr, 1); \ } FAIL_ACTION(eccerr, fault_eccerr_attr) FAIL_ACTION(bitflips, fault_bitflips_attr) FAIL_ACTION(read_failure, fault_read_failure_attr) FAIL_ACTION(write_failure, fault_write_failure_attr) FAIL_ACTION(erase_failure, fault_erase_failure_attr) FAIL_ACTION(power_cut, fault_power_cut_attr) FAIL_ACTION(io_ff, fault_io_ff_attr) FAIL_ACTION(io_ff_bitflips, fault_io_ff_bitflips_attr) FAIL_ACTION(bad_hdr, fault_bad_hdr_attr) FAIL_ACTION(bad_hdr_ebadmsg, fault_bad_hdr_ebadmsg_attr) #endif /** * ubi_dump_flash - dump a region of flash. * @ubi: UBI device description object * @pnum: the physical eraseblock number to dump * @offset: the starting offset within the physical eraseblock to dump * @len: the length of the region to dump */ void ubi_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) { int err; size_t read; void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; buf = vmalloc(len); if (!buf) return; err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err && err != -EUCLEAN) { ubi_err(ubi, "err %d while reading %d bytes from PEB %d:%d, read %zd bytes", err, len, pnum, offset, read); goto out; } ubi_msg(ubi, "dumping %d bytes of data from PEB %d, offset %d", len, pnum, offset); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); out: vfree(buf); return; } /** * ubi_dump_ec_hdr - dump an erase counter header. * @ec_hdr: the erase counter header to dump */ void ubi_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { pr_err("Erase counter header dump:\n"); pr_err("\tmagic %#08x\n", be32_to_cpu(ec_hdr->magic)); pr_err("\tversion %d\n", (int)ec_hdr->version); pr_err("\tec %llu\n", (long long)be64_to_cpu(ec_hdr->ec)); pr_err("\tvid_hdr_offset %d\n", be32_to_cpu(ec_hdr->vid_hdr_offset)); pr_err("\tdata_offset %d\n", be32_to_cpu(ec_hdr->data_offset)); pr_err("\timage_seq %d\n", be32_to_cpu(ec_hdr->image_seq)); pr_err("\thdr_crc %#08x\n", be32_to_cpu(ec_hdr->hdr_crc)); pr_err("erase counter header hexdump:\n"); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ec_hdr, UBI_EC_HDR_SIZE, 1); } /** * ubi_dump_vid_hdr - dump a volume identifier header. * @vid_hdr: the volume identifier header to dump */ void ubi_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { pr_err("Volume identifier header dump:\n"); pr_err("\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); pr_err("\tversion %d\n", (int)vid_hdr->version); pr_err("\tvol_type %d\n", (int)vid_hdr->vol_type); pr_err("\tcopy_flag %d\n", (int)vid_hdr->copy_flag); pr_err("\tcompat %d\n", (int)vid_hdr->compat); pr_err("\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); pr_err("\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); pr_err("\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); pr_err("\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); pr_err("\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); pr_err("\tsqnum %llu\n", (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); pr_err("\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); pr_err("Volume identifier header hexdump:\n"); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, vid_hdr, UBI_VID_HDR_SIZE, 1); } /** * ubi_dump_vol_info - dump volume information. * @vol: UBI volume description object */ void ubi_dump_vol_info(const struct ubi_volume *vol) { pr_err("Volume information dump:\n"); pr_err("\tvol_id %d\n", vol->vol_id); pr_err("\treserved_pebs %d\n", vol->reserved_pebs); pr_err("\talignment %d\n", vol->alignment); pr_err("\tdata_pad %d\n", vol->data_pad); pr_err("\tvol_type %d\n", vol->vol_type); pr_err("\tname_len %d\n", vol->name_len); pr_err("\tusable_leb_size %d\n", vol->usable_leb_size); pr_err("\tused_ebs %d\n", vol->used_ebs); pr_err("\tused_bytes %lld\n", vol->used_bytes); pr_err("\tlast_eb_bytes %d\n", vol->last_eb_bytes); pr_err("\tcorrupted %d\n", vol->corrupted); pr_err("\tupd_marker %d\n", vol->upd_marker); pr_err("\tskip_check %d\n", vol->skip_check); if (vol->name_len <= UBI_VOL_NAME_MAX && strnlen(vol->name, vol->name_len + 1) == vol->name_len) { pr_err("\tname %s\n", vol->name); } else { pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", vol->name[0], vol->name[1], vol->name[2], vol->name[3], vol->name[4]); } } /** * ubi_dump_vtbl_record - dump a &struct ubi_vtbl_record object. * @r: the object to dump * @idx: volume table index */ void ubi_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { int name_len = be16_to_cpu(r->name_len); pr_err("Volume table record %d dump:\n", idx); pr_err("\treserved_pebs %d\n", be32_to_cpu(r->reserved_pebs)); pr_err("\talignment %d\n", be32_to_cpu(r->alignment)); pr_err("\tdata_pad %d\n", be32_to_cpu(r->data_pad)); pr_err("\tvol_type %d\n", (int)r->vol_type); pr_err("\tupd_marker %d\n", (int)r->upd_marker); pr_err("\tname_len %d\n", name_len); if (r->name[0] == '\0') { pr_err("\tname NULL\n"); return; } if (name_len <= UBI_VOL_NAME_MAX && strnlen(&r->name[0], name_len + 1) == name_len) { pr_err("\tname %s\n", &r->name[0]); } else { pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", r->name[0], r->name[1], r->name[2], r->name[3], r->name[4]); } pr_err("\tcrc %#08x\n", be32_to_cpu(r->crc)); } /** * ubi_dump_av - dump a &struct ubi_ainf_volume object. * @av: the object to dump */ void ubi_dump_av(const struct ubi_ainf_volume *av) { pr_err("Volume attaching information dump:\n"); pr_err("\tvol_id %d\n", av->vol_id); pr_err("\thighest_lnum %d\n", av->highest_lnum); pr_err("\tleb_count %d\n", av->leb_count); pr_err("\tcompat %d\n", av->compat); pr_err("\tvol_type %d\n", av->vol_type); pr_err("\tused_ebs %d\n", av->used_ebs); pr_err("\tlast_data_size %d\n", av->last_data_size); pr_err("\tdata_pad %d\n", av->data_pad); } /** * ubi_dump_aeb - dump a &struct ubi_ainf_peb object. * @aeb: the object to dump * @type: object type: 0 - not corrupted, 1 - corrupted */ void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type) { pr_err("eraseblock attaching information dump:\n"); pr_err("\tec %d\n", aeb->ec); pr_err("\tpnum %d\n", aeb->pnum); if (type == 0) { pr_err("\tlnum %d\n", aeb->lnum); pr_err("\tscrub %d\n", aeb->scrub); pr_err("\tsqnum %llu\n", aeb->sqnum); } } /** * ubi_dump_mkvol_req - dump a &struct ubi_mkvol_req object. * @req: the object to dump */ void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req) { char nm[17]; pr_err("Volume creation request dump:\n"); pr_err("\tvol_id %d\n", req->vol_id); pr_err("\talignment %d\n", req->alignment); pr_err("\tbytes %lld\n", (long long)req->bytes); pr_err("\tvol_type %d\n", req->vol_type); pr_err("\tname_len %d\n", req->name_len); memcpy(nm, req->name, 16); nm[16] = 0; pr_err("\t1st 16 characters of name: %s\n", nm); } /* * Root directory for UBI stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular UBI devices. */ static struct dentry *dfs_rootdir; #ifdef CONFIG_MTD_UBI_FAULT_INJECTION static void dfs_create_fault_entry(struct dentry *parent) { struct dentry *dir; dir = debugfs_create_dir("fault_inject", parent); if (IS_ERR_OR_NULL(dir)) { int err = dir ? PTR_ERR(dir) : -ENODEV; pr_warn("UBI error: cannot create \"fault_inject\" debugfs directory, error %d\n", err); return; } fault_create_debugfs_attr("emulate_eccerr", dir, &fault_eccerr_attr); fault_create_debugfs_attr("emulate_read_failure", dir, &fault_read_failure_attr); fault_create_debugfs_attr("emulate_bitflips", dir, &fault_bitflips_attr); fault_create_debugfs_attr("emulate_write_failure", dir, &fault_write_failure_attr); fault_create_debugfs_attr("emulate_erase_failure", dir, &fault_erase_failure_attr); fault_create_debugfs_attr("emulate_power_cut", dir, &fault_power_cut_attr); fault_create_debugfs_attr("emulate_io_ff", dir, &fault_io_ff_attr); fault_create_debugfs_attr("emulate_io_ff_bitflips", dir, &fault_io_ff_bitflips_attr); fault_create_debugfs_attr("emulate_bad_hdr", dir, &fault_bad_hdr_attr); fault_create_debugfs_attr("emulate_bad_hdr_ebadmsg", dir, &fault_bad_hdr_ebadmsg_attr); } #endif /** * ubi_debugfs_init - create UBI debugfs directory. * * Create UBI debugfs directory. Returns zero in case of success and a negative * error code in case of failure. */ int ubi_debugfs_init(void) { if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; dfs_rootdir = debugfs_create_dir("ubi", NULL); if (IS_ERR_OR_NULL(dfs_rootdir)) { int err = dfs_rootdir ? PTR_ERR(dfs_rootdir) : -ENODEV; pr_err("UBI error: cannot create \"ubi\" debugfs directory, error %d\n", err); return err; } #ifdef CONFIG_MTD_UBI_FAULT_INJECTION dfs_create_fault_entry(dfs_rootdir); #endif return 0; } /** * ubi_debugfs_exit - remove UBI debugfs directory. */ void ubi_debugfs_exit(void) { if (IS_ENABLED(CONFIG_DEBUG_FS)) debugfs_remove(dfs_rootdir); } /* Read an UBI debugfs file */ static ssize_t dfs_file_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { unsigned long ubi_num = (unsigned long)file->private_data; struct dentry *dent = file->f_path.dentry; struct ubi_device *ubi; struct ubi_debug_info *d; char buf[16]; int val; ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; d = &ubi->dbg; if (dent == d->dfs_chk_gen) val = d->chk_gen; else if (dent == d->dfs_chk_io) val = d->chk_io; else if (dent == d->dfs_chk_fastmap) val = d->chk_fastmap; else if (dent == d->dfs_disable_bgt) val = d->disable_bgt; else if (dent == d->dfs_emulate_bitflips) val = d->emulate_bitflips; else if (dent == d->dfs_emulate_io_failures) val = d->emulate_io_failures; else if (dent == d->dfs_emulate_failures) { snprintf(buf, sizeof(buf), "0x%04x\n", d->emulate_failures); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else if (dent == d->dfs_emulate_power_cut) { snprintf(buf, sizeof(buf), "%u\n", d->emulate_power_cut); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else if (dent == d->dfs_power_cut_min) { snprintf(buf, sizeof(buf), "%u\n", d->power_cut_min); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else if (dent == d->dfs_power_cut_max) { snprintf(buf, sizeof(buf), "%u\n", d->power_cut_max); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else { count = -EINVAL; goto out; } if (val) buf[0] = '1'; else buf[0] = '0'; buf[1] = '\n'; buf[2] = 0x00; count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); out: ubi_put_device(ubi); return count; } /* Write an UBI debugfs file */ static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { unsigned long ubi_num = (unsigned long)file->private_data; struct dentry *dent = file->f_path.dentry; struct ubi_device *ubi; struct ubi_debug_info *d; size_t buf_size; char buf[16] = {0}; int val; ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; d = &ubi->dbg; buf_size = min_t(size_t, count, (sizeof(buf) - 1)); if (copy_from_user(buf, user_buf, buf_size)) { count = -EFAULT; goto out; } if (dent == d->dfs_emulate_failures) { if (kstrtouint(buf, 0, &d->emulate_failures) != 0) count = -EINVAL; goto out; } else if (dent == d->dfs_power_cut_min) { if (kstrtouint(buf, 0, &d->power_cut_min) != 0) count = -EINVAL; goto out; } else if (dent == d->dfs_power_cut_max) { if (kstrtouint(buf, 0, &d->power_cut_max) != 0) count = -EINVAL; goto out; } else if (dent == d->dfs_emulate_power_cut) { if (kstrtoint(buf, 0, &val) != 0) count = -EINVAL; else d->emulate_power_cut = val; goto out; } if (buf[0] == '1') val = 1; else if (buf[0] == '0') val = 0; else { count = -EINVAL; goto out; } if (dent == d->dfs_chk_gen) d->chk_gen = val; else if (dent == d->dfs_chk_io) d->chk_io = val; else if (dent == d->dfs_chk_fastmap) d->chk_fastmap = val; else if (dent == d->dfs_disable_bgt) d->disable_bgt = val; else if (dent == d->dfs_emulate_bitflips) d->emulate_bitflips = val; else if (dent == d->dfs_emulate_io_failures) d->emulate_io_failures = val; else count = -EINVAL; out: ubi_put_device(ubi); return count; } /* File operations for all UBI debugfs files except * detailed_erase_block_info */ static const struct file_operations dfs_fops = { .read = dfs_file_read, .write = dfs_file_write, .open = simple_open, .owner = THIS_MODULE, }; /* As long as the position is less then that total number of erase blocks, * we still have more to print. */ static void *eraseblk_count_seq_start(struct seq_file *s, loff_t *pos) { struct ubi_device *ubi = s->private; if (*pos < ubi->peb_count) return pos; return NULL; } /* Since we are using the position as the iterator, we just need to check if we * are done and increment the position. */ static void *eraseblk_count_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct ubi_device *ubi = s->private; (*pos)++; if (*pos < ubi->peb_count) return pos; return NULL; } static void eraseblk_count_seq_stop(struct seq_file *s, void *v) { } static int eraseblk_count_seq_show(struct seq_file *s, void *iter) { struct ubi_device *ubi = s->private; struct ubi_wl_entry *wl; int *block_number = iter; int erase_count = -1; int err; /* If this is the start, print a header */ if (*block_number == 0) seq_puts(s, "physical_block_number\terase_count\n"); err = ubi_io_is_bad(ubi, *block_number); if (err) return err; spin_lock(&ubi->wl_lock); wl = ubi->lookuptbl[*block_number]; if (wl) erase_count = wl->ec; spin_unlock(&ubi->wl_lock); if (erase_count < 0) return 0; seq_printf(s, "%-22d\t%-11d\n", *block_number, erase_count); return 0; } static const struct seq_operations eraseblk_count_seq_ops = { .start = eraseblk_count_seq_start, .next = eraseblk_count_seq_next, .stop = eraseblk_count_seq_stop, .show = eraseblk_count_seq_show }; static int eraseblk_count_open(struct inode *inode, struct file *f) { struct seq_file *s; int err; err = seq_open(f, &eraseblk_count_seq_ops); if (err) return err; s = f->private_data; s->private = ubi_get_device((unsigned long)inode->i_private); if (!s->private) return -ENODEV; else return 0; } static int eraseblk_count_release(struct inode *inode, struct file *f) { struct seq_file *s = f->private_data; struct ubi_device *ubi = s->private; ubi_put_device(ubi); return seq_release(inode, f); } static const struct file_operations eraseblk_count_fops = { .owner = THIS_MODULE, .open = eraseblk_count_open, .read = seq_read, .llseek = seq_lseek, .release = eraseblk_count_release, }; /** * ubi_debugfs_init_dev - initialize debugfs for an UBI device. * @ubi: UBI device description object * * This function creates all debugfs files for UBI device @ubi. Returns zero in * case of success and a negative error code in case of failure. */ int ubi_debugfs_init_dev(struct ubi_device *ubi) { unsigned long ubi_num = ubi->ubi_num; struct ubi_debug_info *d = &ubi->dbg; umode_t mode = S_IRUSR | S_IWUSR; int n; if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN, UBI_DFS_DIR_NAME, ubi->ubi_num); if (n >= UBI_DFS_DIR_LEN) { /* The array size is too small */ return -EINVAL; } d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); d->dfs_chk_gen = debugfs_create_file("chk_gen", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_chk_io = debugfs_create_file("chk_io", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_chk_fastmap = debugfs_create_file("chk_fastmap", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_disable_bgt = debugfs_create_file("tst_disable_bgt", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_emulate_bitflips = debugfs_create_file("tst_emulate_bitflips", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_emulate_io_failures = debugfs_create_file("tst_emulate_io_failures", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_emulate_power_cut = debugfs_create_file("tst_emulate_power_cut", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_power_cut_min = debugfs_create_file("tst_emulate_power_cut_min", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_power_cut_max = debugfs_create_file("tst_emulate_power_cut_max", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); debugfs_create_file("detailed_erase_block_info", S_IRUSR, d->dfs_dir, (void *)ubi_num, &eraseblk_count_fops); #ifdef CONFIG_MTD_UBI_FAULT_INJECTION d->dfs_emulate_failures = debugfs_create_file("emulate_failures", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); #endif return 0; } /** * ubi_debugfs_exit_dev - free all debugfs files corresponding to device @ubi * @ubi: UBI device description object */ void ubi_debugfs_exit_dev(struct ubi_device *ubi) { if (IS_ENABLED(CONFIG_DEBUG_FS)) debugfs_remove_recursive(ubi->dbg.dfs_dir); } /** * ubi_dbg_power_cut - emulate a power cut if it is time to do so * @ubi: UBI device description object * @caller: Flags set to indicate from where the function is being called * * Returns non-zero if a power cut was emulated, zero if not. */ int ubi_dbg_power_cut(struct ubi_device *ubi, int caller) { unsigned int range; if ((ubi->dbg.emulate_power_cut & caller) == 0) return 0; if (ubi->dbg.power_cut_counter == 0) { ubi->dbg.power_cut_counter = ubi->dbg.power_cut_min; if (ubi->dbg.power_cut_max > ubi->dbg.power_cut_min) { range = ubi->dbg.power_cut_max - ubi->dbg.power_cut_min; ubi->dbg.power_cut_counter += get_random_u32_below(range); } return 0; } ubi->dbg.power_cut_counter--; if (ubi->dbg.power_cut_counter) return 0; return 1; }
10 8 1 1 1 1 9 2 7 1 3 4 1 5 9 10 10 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/befs/linuxvfs.c * * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/errno.h> #include <linux/filelock.h> #include <linux/stat.h> #include <linux/nls.h> #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/exportfs.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include "befs.h" #include "btree.h" #include "inode.h" #include "datastream.h" #include "super.h" #include "io.h" MODULE_DESCRIPTION("BeOS File System (BeFS) driver"); MODULE_AUTHOR("Will Dyson"); MODULE_LICENSE("GPL"); /* The units the vfs expects inode->i_blocks to be in */ #define VFS_BLOCK_SIZE 512 static int befs_readdir(struct file *, struct dir_context *); static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_read_folio(struct file *file, struct folio *folio); static sector_t befs_bmap(struct address_space *mapping, sector_t block); static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int); static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_free_inode(struct inode *inode); static void befs_destroy_inodecache(void); static int befs_symlink_read_folio(struct file *, struct folio *); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static void befs_put_super(struct super_block *); static int befs_statfs(struct dentry *, struct kstatfs *); static int befs_show_options(struct seq_file *, struct dentry *); static struct dentry *befs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); static struct dentry *befs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); static struct dentry *befs_get_parent(struct dentry *child); static void befs_free_fc(struct fs_context *fc); static const struct super_operations befs_sops = { .alloc_inode = befs_alloc_inode, /* allocate a new inode */ .free_inode = befs_free_inode, /* deallocate an inode */ .put_super = befs_put_super, /* uninit super */ .statfs = befs_statfs, /* statfs */ .show_options = befs_show_options, }; /* slab cache for befs_inode_info objects */ static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, .iterate_shared = befs_readdir, .llseek = generic_file_llseek, .setlease = generic_setlease, }; static const struct inode_operations befs_dir_inode_operations = { .lookup = befs_lookup, }; static const struct address_space_operations befs_aops = { .read_folio = befs_read_folio, .bmap = befs_bmap, }; static const struct address_space_operations befs_symlink_aops = { .read_folio = befs_symlink_read_folio, }; static const struct export_operations befs_export_operations = { .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = befs_fh_to_dentry, .fh_to_parent = befs_fh_to_parent, .get_parent = befs_get_parent, }; /* * Called by generic_file_read() to read a folio of data * * In turn, simply calls a generic block read function and * passes it the address of befs_get_block, for mapping file * positions to disk blocks. */ static int befs_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, befs_get_block); } static sector_t befs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, befs_get_block); } /* * Generic function to map a file position (block) to a * disk offset (passed back in bh_result). * * Used by many higher level functions. * * Calls befs_fblock2brun() in datastream.c to do the real work. */ static int befs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { struct super_block *sb = inode->i_sb; befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; befs_block_run run = BAD_IADDR; int res; ulong disk_off; befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", (unsigned long)inode->i_ino, (long)block); if (create) { befs_error(sb, "befs_get_block() was asked to write to " "block %ld in inode %lu", (long)block, (unsigned long)inode->i_ino); return -EPERM; } res = befs_fblock2brun(sb, ds, block, &run); if (res != BEFS_OK) { befs_error(sb, "<--- %s for inode %lu, block %ld ERROR", __func__, (unsigned long)inode->i_ino, (long)block); return -EFBIG; } disk_off = (ulong) iaddr2blockno(sb, &run); map_bh(bh_result, inode->i_sb, disk_off); befs_debug(sb, "<--- %s for inode %lu, block %ld, disk address %lu", __func__, (unsigned long)inode->i_ino, (long)block, (unsigned long)disk_off); return 0; } static struct dentry * befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; struct super_block *sb = dir->i_sb; const befs_data_stream *ds = &BEFS_I(dir)->i_data.ds; befs_off_t offset; int ret; int utfnamelen; char *utfname; const char *name = dentry->d_name.name; befs_debug(sb, "---> %s name %pd inode %ld", __func__, dentry, dir->i_ino); /* Convert to UTF-8 */ if (BEFS_SB(sb)->nls) { ret = befs_nls2utf(sb, name, strlen(name), &utfname, &utfnamelen); if (ret < 0) { befs_debug(sb, "<--- %s ERROR", __func__); return ERR_PTR(ret); } ret = befs_btree_find(sb, ds, utfname, &offset); kfree(utfname); } else { ret = befs_btree_find(sb, ds, name, &offset); } if (ret == BEFS_BT_NOT_FOUND) { befs_debug(sb, "<--- %s %pd not found", __func__, dentry); inode = NULL; } else if (ret != BEFS_OK || offset == 0) { befs_error(sb, "<--- %s Error", __func__); inode = ERR_PTR(-ENODATA); } else { inode = befs_iget(dir->i_sb, (ino_t) offset); } befs_debug(sb, "<--- %s", __func__); return d_splice_alias(inode, dentry); } static int befs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; const befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; befs_off_t value; int result; size_t keysize; char keybuf[BEFS_NAME_LEN + 1]; befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld", __func__, file, inode->i_ino, ctx->pos); while (1) { result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, keybuf, &keysize, &value); if (result == BEFS_ERR) { befs_debug(sb, "<--- %s ERROR", __func__); befs_error(sb, "IO error reading %pD (inode %lu)", file, inode->i_ino); return -EIO; } else if (result == BEFS_BT_END) { befs_debug(sb, "<--- %s END", __func__); return 0; } else if (result == BEFS_BT_EMPTY) { befs_debug(sb, "<--- %s Empty directory", __func__); return 0; } /* Convert to NLS */ if (BEFS_SB(sb)->nls) { char *nlsname; int nlsnamelen; result = befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); if (result < 0) { befs_debug(sb, "<--- %s ERROR", __func__); return result; } if (!dir_emit(ctx, nlsname, nlsnamelen, (ino_t) value, DT_UNKNOWN)) { kfree(nlsname); return 0; } kfree(nlsname); } else { if (!dir_emit(ctx, keybuf, keysize, (ino_t) value, DT_UNKNOWN)) return 0; } ctx->pos++; } } static struct inode * befs_alloc_inode(struct super_block *sb) { struct befs_inode_info *bi; bi = alloc_inode_sb(sb, befs_inode_cachep, GFP_KERNEL); if (!bi) return NULL; return &bi->vfs_inode; } static void befs_free_inode(struct inode *inode) { kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; inode_init_once(&bi->vfs_inode); } static struct inode *befs_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh; befs_inode *raw_inode; struct befs_sb_info *befs_sb = BEFS_SB(sb); struct befs_inode_info *befs_ino; struct inode *inode; befs_debug(sb, "---> %s inode = %lu", __func__, ino); inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode_state_read_once(inode) & I_NEW)) return inode; befs_ino = BEFS_I(inode); /* convert from vfs's inode number to befs's inode number */ befs_ino->i_inode_num = blockno2iaddr(sb, inode->i_ino); befs_debug(sb, " real inode number [%u, %hu, %hu]", befs_ino->i_inode_num.allocation_group, befs_ino->i_inode_num.start, befs_ino->i_inode_num.len); bh = sb_bread(sb, inode->i_ino); if (!bh) { befs_error(sb, "unable to read inode block - " "inode = %lu", inode->i_ino); goto unacquire_none; } raw_inode = (befs_inode *) bh->b_data; befs_dump_inode(sb, raw_inode); if (befs_check_inode(sb, raw_inode, inode->i_ino) != BEFS_OK) { befs_error(sb, "Bad inode: %lu", inode->i_ino); goto unacquire_bh; } inode->i_mode = (umode_t) fs32_to_cpu(sb, raw_inode->mode); /* * set uid and gid. But since current BeOS is single user OS, so * you can change by "uid" or "gid" options. */ inode->i_uid = befs_sb->mount_opts.use_uid ? befs_sb->mount_opts.uid : make_kuid(&init_user_ns, fs32_to_cpu(sb, raw_inode->uid)); inode->i_gid = befs_sb->mount_opts.use_gid ? befs_sb->mount_opts.gid : make_kgid(&init_user_ns, fs32_to_cpu(sb, raw_inode->gid)); set_nlink(inode, 1); /* * BEFS's time is 64 bits, but current VFS is 32 bits... * BEFS don't have access time. Nor inode change time. VFS * doesn't have creation time. * Also, the lower 16 bits of the last_modified_time and * create_time are just a counter to help ensure uniqueness * for indexing purposes. (PFD, page 54) */ inode_set_mtime(inode, fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16, 0);/* lower 16 bits are not a time */ inode_set_ctime_to_ts(inode, inode_get_mtime(inode)); inode_set_atime_to_ts(inode, inode_get_mtime(inode)); befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num); befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent); befs_ino->i_attribute = fsrun_to_cpu(sb, raw_inode->attributes); befs_ino->i_flags = fs32_to_cpu(sb, raw_inode->flags); if (S_ISLNK(inode->i_mode) && !(befs_ino->i_flags & BEFS_LONG_SYMLINK)){ inode->i_size = 0; inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; strscpy(befs_ino->i_data.symlink, raw_inode->data.symlink, BEFS_SYMLINK_LEN); } else { int num_blks; befs_ino->i_data.ds = fsds_to_cpu(sb, &raw_inode->data.datastream); num_blks = befs_count_blocks(sb, &befs_ino->i_data.ds); inode->i_blocks = num_blks * (befs_sb->block_size / VFS_BLOCK_SIZE); inode->i_size = befs_ino->i_data.ds.size; } inode->i_mapping->a_ops = &befs_aops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &generic_ro_fops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &befs_dir_inode_operations; inode->i_fop = &befs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &befs_symlink_aops; } else { inode->i_link = befs_ino->i_data.symlink; inode->i_op = &simple_symlink_inode_operations; } } else { befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " "on disk special files", inode->i_ino); goto unacquire_bh; } brelse(bh); befs_debug(sb, "<--- %s", __func__); unlock_new_inode(inode); return inode; unacquire_bh: brelse(bh); unacquire_none: iget_failed(inode); befs_debug(sb, "<--- %s - Bad inode", __func__); return ERR_PTR(-EIO); } /* Initialize the inode cache. Called at fs setup. * * Taken from NFS implementation by Al Viro. */ static int __init befs_init_inodecache(void) { befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache", sizeof(struct befs_inode_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, offsetof(struct befs_inode_info, i_data.symlink), sizeof_field(struct befs_inode_info, i_data.symlink), init_once); if (befs_inode_cachep == NULL) return -ENOMEM; return 0; } /* Called at fs teardown. * * Taken from NFS implementation by Al Viro. */ static void befs_destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(befs_inode_cachep); } /* * The inode of symbolic link is different to data stream. * The data stream become link name. Unless the LONG_SYMLINK * flag is set. */ static int befs_symlink_read_folio(struct file *unused, struct folio *folio) { struct inode *inode = folio->mapping->host; struct super_block *sb = inode->i_sb; struct befs_inode_info *befs_ino = BEFS_I(inode); befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; char *link = folio_address(folio); int err = -EIO; if (len == 0 || len > PAGE_SIZE) { befs_error(sb, "Long symlink with illegal length"); goto fail; } befs_debug(sb, "Follow long symlink"); if (befs_read_lsymlink(sb, data, link, len) != len) { befs_error(sb, "Failed to read entire long symlink"); goto fail; } link[len - 1] = '\0'; err = 0; fail: folio_end_read(folio, err == 0); return err; } /* * UTF-8 to NLS charset convert routine * * Uses uni2char() / char2uni() rather than the nls tables directly */ static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; unicode_t uni; int unilen, utflen; char *result; /* The utf8->nls conversion won't make the final nls string bigger * than the utf one, but if the string is pure ascii they'll have the * same width and an extra char is needed to save the additional \0 */ int maxlen = in_len + 1; befs_debug(sb, "---> %s", __func__); if (!nls) { befs_error(sb, "%s called with no NLS table loaded", __func__); return -EINVAL; } *out = result = kmalloc(maxlen, GFP_NOFS); if (!*out) return -ENOMEM; for (i = o = 0; i < in_len; i += utflen, o += unilen) { /* convert from UTF-8 to Unicode */ utflen = utf8_to_utf32(&in[i], in_len - i, &uni); if (utflen < 0) goto conv_err; /* convert from Unicode to nls */ if (uni > MAX_WCHAR_T) goto conv_err; unilen = nls->uni2char(uni, &result[o], in_len - o); if (unilen < 0) goto conv_err; } result[o] = '\0'; *out_len = o; befs_debug(sb, "<--- %s", __func__); return o; conv_err: befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- %s", __func__); kfree(result); return -EILSEQ; } /** * befs_nls2utf - Convert NLS string to utf8 encodeing * @sb: Superblock * @in: Input string buffer in NLS format * @in_len: Length of input string in bytes * @out: The output string in UTF-8 format * @out_len: Length of the output buffer * * Converts input string @in, which is in the format of the loaded NLS map, * into a utf8 string. * * The destination string @out is allocated by this function and the caller is * responsible for freeing it with kfree() * * On return, *@out_len is the length of @out in bytes. * * On success, the return value is the number of utf8 characters written to * the output buffer @out. * * On Failure, a negative number coresponding to the error code is returned. */ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, char **out, int *out_len) { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; wchar_t uni; int unilen, utflen; char *result; /* * There are nls characters that will translate to 3-chars-wide UTF-8 * characters, an additional byte is needed to save the final \0 * in special cases */ int maxlen = (3 * in_len) + 1; befs_debug(sb, "---> %s\n", __func__); if (!nls) { befs_error(sb, "%s called with no NLS table loaded.", __func__); return -EINVAL; } *out = result = kmalloc(maxlen, GFP_NOFS); if (!*out) { *out_len = 0; return -ENOMEM; } for (i = o = 0; i < in_len; i += unilen, o += utflen) { /* convert from nls to unicode */ unilen = nls->char2uni(&in[i], in_len - i, &uni); if (unilen < 0) goto conv_err; /* convert from unicode to UTF-8 */ utflen = utf32_to_utf8(uni, &result[o], 3); if (utflen <= 0) goto conv_err; } result[o] = '\0'; *out_len = o; befs_debug(sb, "<--- %s", __func__); return i; conv_err: befs_error(sb, "Name using character set %s contains a character that " "cannot be converted to unicode.", nls->charset); befs_debug(sb, "<--- %s", __func__); kfree(result); return -EILSEQ; } static struct inode *befs_nfs_get_inode(struct super_block *sb, uint64_t ino, uint32_t generation) { /* No need to handle i_generation */ return befs_iget(sb, ino); } /* * Map a NFS file handle to a corresponding dentry */ static struct dentry *befs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, befs_nfs_get_inode); } /* * Find the parent for a file specified by NFS handle */ static struct dentry *befs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, befs_nfs_get_inode); } static struct dentry *befs_get_parent(struct dentry *child) { struct inode *parent; struct befs_inode_info *befs_ino = BEFS_I(d_inode(child)); parent = befs_iget(child->d_sb, (unsigned long)befs_ino->i_parent.start); return d_obtain_alias(parent); } enum { Opt_uid, Opt_gid, Opt_charset, Opt_debug, }; static const struct fs_parameter_spec befs_param_spec[] = { fsparam_uid ("uid", Opt_uid), fsparam_gid ("gid", Opt_gid), fsparam_string ("iocharset", Opt_charset), fsparam_flag ("debug", Opt_debug), {} }; static int befs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct befs_mount_options *opts = fc->fs_private; int token; struct fs_parse_result result; /* befs ignores all options on remount */ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) return 0; token = fs_parse(fc, befs_param_spec, param, &result); if (token < 0) return token; switch (token) { case Opt_uid: opts->uid = result.uid; opts->use_uid = 1; break; case Opt_gid: opts->gid = result.gid; opts->use_gid = 1; break; case Opt_charset: kfree(opts->iocharset); opts->iocharset = param->string; param->string = NULL; break; case Opt_debug: opts->debug = 1; break; default: return -EINVAL; } return 0; } static int befs_show_options(struct seq_file *m, struct dentry *root) { struct befs_sb_info *befs_sb = BEFS_SB(root->d_sb); struct befs_mount_options *opts = &befs_sb->mount_opts; if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, opts->uid)); if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); if (opts->iocharset) seq_printf(m, ",charset=%s", opts->iocharset); if (opts->debug) seq_puts(m, ",debug"); return 0; } /* This function has the responsibiltiy of getting the * filesystem ready for unmounting. * Basically, we free everything that we allocated in * befs_read_inode */ static void befs_put_super(struct super_block *sb) { kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; unload_nls(BEFS_SB(sb)->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } /* * Copy the parsed options into the sbi mount_options member */ static void befs_set_options(struct befs_sb_info *sbi, struct befs_mount_options *opts) { sbi->mount_opts.uid = opts->uid; sbi->mount_opts.gid = opts->gid; sbi->mount_opts.use_uid = opts->use_uid; sbi->mount_opts.use_gid = opts->use_gid; sbi->mount_opts.debug = opts->debug; sbi->mount_opts.iocharset = opts->iocharset; opts->iocharset = NULL; } /* Allocate private field of the superblock, fill it. * * Finish filling the public superblock fields * Make the root directory * Load a set of NLS translations if needed. */ static int befs_fill_super(struct super_block *sb, struct fs_context *fc) { struct buffer_head *bh; struct befs_sb_info *befs_sb; befs_super_block *disk_sb; struct inode *root; long ret = -EINVAL; const unsigned long sb_block = 0; const off_t x86_sb_off = 512; int blocksize; struct befs_mount_options *parsed_opts = fc->fs_private; int silent = fc->sb_flags & SB_SILENT; sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL); if (sb->s_fs_info == NULL) goto unacquire_none; befs_sb = BEFS_SB(sb); befs_set_options(befs_sb, parsed_opts); befs_debug(sb, "---> %s", __func__); if (!sb_rdonly(sb)) { befs_warning(sb, "No write support. Marking filesystem read-only"); sb->s_flags |= SB_RDONLY; } /* * Set dummy blocksize to read super block. * Will be set to real fs blocksize later. * * Linux 2.4.10 and later refuse to read blocks smaller than * the logical block size for the device. But we also need to read at * least 1k to get the second 512 bytes of the volume. */ blocksize = sb_min_blocksize(sb, 1024); if (!blocksize) { if (!silent) befs_error(sb, "unable to set blocksize"); goto unacquire_priv_sbp; } bh = sb_bread(sb, sb_block); if (!bh) { if (!silent) befs_error(sb, "unable to read superblock"); goto unacquire_priv_sbp; } /* account for offset of super block on x86 */ disk_sb = (befs_super_block *) bh->b_data; if ((disk_sb->magic1 == BEFS_SUPER_MAGIC1_LE) || (disk_sb->magic1 == BEFS_SUPER_MAGIC1_BE)) { befs_debug(sb, "Using PPC superblock location"); } else { befs_debug(sb, "Using x86 superblock location"); disk_sb = (befs_super_block *) ((void *) bh->b_data + x86_sb_off); } if ((befs_load_sb(sb, disk_sb) != BEFS_OK) || (befs_check_sb(sb) != BEFS_OK)) goto unacquire_bh; befs_dump_super_block(sb, disk_sb); brelse(bh); if (befs_sb->num_blocks > ~((sector_t)0)) { if (!silent) befs_error(sb, "blocks count: %llu is larger than the host can use", befs_sb->num_blocks); goto unacquire_priv_sbp; } /* * set up enough so that it can read an inode * Fill in kernel superblock fields from private sb */ sb->s_magic = BEFS_SUPER_MAGIC; /* Set real blocksize of fs */ sb_set_blocksize(sb, (ulong) befs_sb->block_size); sb->s_op = &befs_sops; sb->s_export_op = &befs_export_operations; sb->s_time_min = 0; sb->s_time_max = 0xffffffffffffll; root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); if (IS_ERR(root)) { ret = PTR_ERR(root); goto unacquire_priv_sbp; } sb->s_root = d_make_root(root); if (!sb->s_root) { if (!silent) befs_error(sb, "get root inode failed"); goto unacquire_priv_sbp; } /* load nls library */ if (befs_sb->mount_opts.iocharset) { befs_debug(sb, "Loading nls: %s", befs_sb->mount_opts.iocharset); befs_sb->nls = load_nls(befs_sb->mount_opts.iocharset); if (!befs_sb->nls) { befs_warning(sb, "Cannot load nls %s" " loading default nls", befs_sb->mount_opts.iocharset); befs_sb->nls = load_nls_default(); } /* load default nls if none is specified in mount options */ } else { befs_debug(sb, "Loading default nls"); befs_sb->nls = load_nls_default(); } return 0; unacquire_bh: brelse(bh); unacquire_priv_sbp: kfree(befs_sb->mount_opts.iocharset); kfree(sb->s_fs_info); sb->s_fs_info = NULL; unacquire_none: return ret; } static int befs_reconfigure(struct fs_context *fc) { sync_filesystem(fc->root->d_sb); if (!(fc->sb_flags & SB_RDONLY)) return -EINVAL; return 0; } static int befs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); befs_debug(sb, "---> %s", __func__); buf->f_type = BEFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = BEFS_SB(sb)->num_blocks; buf->f_bfree = BEFS_SB(sb)->num_blocks - BEFS_SB(sb)->used_blocks; buf->f_bavail = buf->f_bfree; buf->f_files = 0; /* UNKNOWN */ buf->f_ffree = 0; /* UNKNOWN */ buf->f_fsid = u64_to_fsid(id); buf->f_namelen = BEFS_NAME_LEN; befs_debug(sb, "<--- %s", __func__); return 0; } static int befs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, befs_fill_super); } static const struct fs_context_operations befs_context_ops = { .parse_param = befs_parse_param, .get_tree = befs_get_tree, .reconfigure = befs_reconfigure, .free = befs_free_fc, }; static int befs_init_fs_context(struct fs_context *fc) { struct befs_mount_options *opts; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) return -ENOMEM; /* Initialize options */ opts->uid = GLOBAL_ROOT_UID; opts->gid = GLOBAL_ROOT_GID; fc->fs_private = opts; fc->ops = &befs_context_ops; return 0; } static void befs_free_fc(struct fs_context *fc) { struct befs_mount_options *opts = fc->fs_private; kfree(opts->iocharset); kfree(fc->fs_private); } static struct file_system_type befs_fs_type = { .owner = THIS_MODULE, .name = "befs", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = befs_init_fs_context, .parameters = befs_param_spec, }; MODULE_ALIAS_FS("befs"); static int __init init_befs_fs(void) { int err; pr_info("version: %s\n", BEFS_VERSION); err = befs_init_inodecache(); if (err) goto unacquire_none; err = register_filesystem(&befs_fs_type); if (err) goto unacquire_inodecache; return 0; unacquire_inodecache: befs_destroy_inodecache(); unacquire_none: return err; } static void __exit exit_befs_fs(void) { befs_destroy_inodecache(); unregister_filesystem(&befs_fs_type); } /* * Macros that typecheck the init and exit functions, * ensures that they are called at init and cleanup, * and eliminates warnings about unused functions. */ module_init(init_befs_fs) module_exit(exit_befs_fs)
44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 // SPDX-License-Identifier: GPL-2.0 /* * procfs-based user access to knfsd statistics * * /proc/net/rpc/nfsd * * Format: * rc <hits> <misses> <nocache> * Statistsics for the reply cache * fh <stale> <deprecated filehandle cache stats> * statistics for filehandle lookup * io <bytes-read> <bytes-written> * statistics for IO throughput * th <threads> <deprecated thread usage histogram stats> * number of threads * ra <deprecated ra-cache stats> * * plus generic RPC stats (see net/sunrpc/stats.c) * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/sunrpc/stats.h> #include <net/net_namespace.h> #include "nfsd.h" static int nfsd_show(struct seq_file *seq, void *v) { struct net *net = pde_data(file_inode(seq->file)); struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i; seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]), percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE])); /* thread usage: */ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt)); /* deprecated thread usage histogram stats */ for (i = 0; i < 10; i++) seq_puts(seq, " 0.000"); /* deprecated ra-cache stats */ seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); /* show my rpc info */ svc_seq_show(seq, &nn->nfsd_svcstats); #ifdef CONFIG_NFSD_V4 /* Show count for individual nfsv4 operations */ /* Writing operation numbers 0 1 2 also for maintaining uniformity */ seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1); for (i = 0; i <= LAST_NFS4_OP; i++) { seq_printf(seq, " %lld", percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)])); } seq_printf(seq, "\nwdeleg_getattr %lld", percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR])); seq_putc(seq, '\n'); #endif return 0; } DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); struct proc_dir_entry *nfsd_proc_stat_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); } void nfsd_proc_stat_shutdown(struct net *net) { svc_proc_unregister(net, "nfsd"); }
44 44 44 44 44 43 44 44 44 44 44 44 44 44 44 43 44 43 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430 10431 10432 10433 10434 10435 10436 10437 10438 10439 10440 10441 10442 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454 10455 10456 10457 10458 10459 10460 10461 10462 10463 10464 10465 10466 10467 10468 10469 10470 10471 10472 10473 10474 10475 10476 10477 10478 10479 10480 10481 10482 10483 10484 10485 10486 10487 10488 10489 10490 10491 10492 10493 10494 10495 10496 10497 10498 10499 10500 10501 10502 10503 10504 10505 10506 10507 10508 10509 10510 10511 10512 10513 10514 10515 10516 10517 10518 10519 10520 10521 10522 10523 10524 10525 10526 10527 10528 10529 10530 10531 10532 10533 10534 10535 10536 10537 10538 10539 10540 10541 10542 10543 10544 10545 10546 10547 10548 10549 10550 10551 10552 10553 10554 10555 10556 10557 10558 10559 10560 10561 10562 10563 10564 10565 10566 10567 10568 10569 10570 10571 10572 10573 10574 10575 10576 10577 10578 10579 10580 10581 10582 10583 10584 10585 10586 10587 10588 10589 10590 10591 10592 10593 10594 10595 10596 10597 10598 10599 10600 10601 10602 10603 10604 10605 10606 10607 10608 10609 10610 10611 10612 10613 10614 10615 10616 10617 10618 10619 10620 10621 10622 10623 10624 10625 10626 10627 10628 10629 10630 10631 10632 10633 10634 10635 10636 10637 10638 10639 10640 10641 10642 10643 10644 10645 10646 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670 10671 10672 10673 10674 10675 10676 10677 10678 10679 10680 10681 10682 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 10707 10708 10709 10710 10711 10712 10713 10714 10715 10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742 10743 10744 10745 10746 10747 10748 10749 10750 10751 10752 10753 10754 10755 10756 10757 10758 10759 10760 10761 10762 10763 10764 10765 10766 10767 10768 10769 10770 10771 10772 10773 10774 10775 10776 10777 10778 10779 10780 10781 10782 10783 10784 10785 10786 10787 10788 10789 10790 10791 10792 10793 10794 10795 10796 10797 10798 10799 10800 10801 10802 10803 10804 10805 10806 10807 10808 10809 10810 10811 10812 10813 10814 10815 10816 10817 10818 10819 10820 10821 10822 10823 10824 10825 10826 10827 10828 10829 10830 10831 10832 10833 10834 10835 10836 10837 10838 10839 10840 10841 10842 10843 10844 10845 10846 10847 10848 10849 10850 10851 10852 10853 10854 10855 10856 10857 10858 10859 10860 10861 10862 10863 10864 10865 10866 10867 10868 10869 10870 10871 10872 10873 10874 10875 10876 10877 10878 10879 10880 10881 10882 10883 10884 10885 10886 10887 10888 10889 10890 10891 10892 10893 10894 10895 10896 10897 10898 10899 10900 10901 10902 10903 10904 10905 10906 10907 10908 10909 10910 10911 10912 10913 10914 10915 10916 10917 10918 10919 10920 10921 10922 10923 10924 10925 10926 10927 10928 10929 10930 10931 10932 10933 10934 10935 10936 10937 10938 10939 10940 10941 10942 10943 10944 10945 10946 10947 10948 10949 10950 10951 10952 10953 10954 10955 10956 10957 10958 10959 10960 10961 10962 10963 10964 10965 10966 10967 10968 10969 10970 10971 10972 10973 10974 10975 10976 10977 10978 10979 10980 10981 10982 10983 10984 10985 10986 10987 10988 10989 10990 10991 10992 10993 10994 10995 10996 10997 10998 10999 11000 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012 11013 11014 11015 11016 11017 11018 11019 11020 11021 11022 11023 11024 11025 11026 11027 11028 11029 11030 11031 11032 11033 11034 11035 11036 11037 11038 11039 11040 11041 11042 11043 11044 11045 11046 11047 11048 11049 11050 11051 11052 11053 11054 11055 11056 11057 11058 11059 11060 11061 11062 11063 11064 11065 11066 11067 11068 11069 11070 11071 11072 11073 11074 11075 11076 11077 11078 11079 11080 11081 11082 11083 11084 11085 11086 11087 11088 11089 11090 11091 11092 11093 11094 11095 11096 11097 11098 11099 11100 11101 11102 11103 11104 11105 11106 11107 11108 11109 11110 11111 11112 11113 11114 11115 11116 11117 11118 11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 11133 11134 11135 11136 11137 11138 11139 11140 11141 11142 11143 11144 11145 11146 11147 11148 11149 11150 11151 11152 11153 11154 11155 11156 11157 11158 11159 11160 11161 11162 11163 11164 11165 11166 11167 11168 11169 11170 11171 11172 11173 11174 11175 11176 11177 11178 11179 11180 11181 11182 11183 11184 11185 11186 11187 11188 11189 11190 11191 11192 11193 11194 11195 11196 11197 11198 11199 11200 11201 11202 11203 11204 11205 11206 11207 11208 11209 11210 11211 11212 11213 11214 11215 11216 11217 11218 11219 11220 11221 11222 11223 11224 11225 11226 11227 11228 11229 11230 11231 11232 11233 11234 11235 11236 11237 11238 11239 11240 11241 11242 11243 11244 11245 11246 11247 11248 11249 11250 11251 11252 11253 11254 11255 11256 11257 11258 11259 11260 11261 11262 11263 11264 11265 11266 11267 11268 11269 11270 11271 11272 11273 11274 11275 11276 11277 11278 11279 11280 11281 11282 11283 11284 11285 11286 11287 11288 11289 11290 11291 11292 11293 11294 11295 11296 11297 11298 11299 11300 11301 11302 11303 11304 11305 11306 11307 11308 11309 11310 11311 11312 11313 11314 11315 11316 11317 11318 11319 11320 11321 11322 11323 11324 11325 11326 11327 11328 11329 11330 11331 11332 11333 11334 11335 11336 11337 11338 11339 11340 11341 11342 11343 11344 11345 11346 11347 11348 11349 11350 11351 11352 11353 11354 11355 11356 11357 11358 11359 11360 11361 11362 11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11373 11374 11375 11376 11377 11378 11379 11380 11381 11382 11383 11384 11385 11386 11387 11388 11389 11390 11391 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402 11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414 11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438 11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450 11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462 11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474 11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486 11487 11488 11489 11490 11491 11492 11493 11494 11495 11496 11497 11498 11499 11500 11501 11502 11503 11504 11505 11506 11507 11508 11509 11510 11511 11512 11513 11514 11515 11516 11517 11518 11519 11520 11521 11522 11523 11524 11525 11526 11527 11528 11529 11530 11531 11532 11533 11534 11535 11536 11537 11538 11539 11540 11541 11542 11543 11544 11545 11546 11547 11548 11549 11550 11551 11552 11553 11554 11555 11556 11557 11558 11559 11560 11561 11562 11563 11564 11565 11566 11567 11568 11569 11570 11571 11572 11573 11574 11575 11576 11577 11578 11579 11580 11581 11582 11583 11584 11585 11586 11587 11588 11589 11590 11591 11592 11593 11594 11595 11596 11597 11598 11599 11600 11601 11602 11603 11604 11605 11606 11607 11608 11609 11610 11611 11612 11613 11614 11615 11616 11617 11618 11619 11620 11621 11622 11623 11624 11625 11626 11627 11628 11629 11630 11631 11632 11633 11634 11635 11636 11637 11638 11639 11640 11641 11642 11643 11644 11645 11646 11647 11648 11649 11650 11651 11652 11653 11654 11655 11656 11657 11658 11659 11660 11661 11662 11663 11664 11665 11666 11667 11668 11669 11670 11671 11672 11673 11674 11675 11676 11677 11678 11679 11680 11681 11682 11683 11684 11685 11686 11687 11688 11689 11690 11691 11692 11693 11694 11695 11696 11697 11698 11699 11700 11701 11702 11703 11704 11705 11706 11707 11708 11709 11710 11711 11712 11713 11714 11715 11716 11717 11718 11719 11720 11721 11722 11723 11724 11725 11726 11727 11728 11729 11730 11731 11732 11733 11734 11735 11736 11737 11738 11739 11740 11741 11742 11743 11744 11745 11746 11747 11748 11749 11750 11751 11752 11753 11754 11755 11756 11757 11758 11759 11760 11761 11762 11763 11764 11765 11766 11767 11768 11769 11770 11771 11772 11773 11774 11775 11776 11777 11778 11779 11780 11781 11782 11783 11784 11785 11786 11787 // SPDX-License-Identifier: GPL-2.0 /* * ring buffer based function tracer * * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> * * Originally taken from the RT patch by: * Arnaldo Carvalho de Melo <acme@redhat.com> * * Based on code from the latency_tracer, that is: * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 Nadia Yvette Chambers */ #include <linux/ring_buffer.h> #include <linux/utsname.h> #include <linux/stacktrace.h> #include <linux/writeback.h> #include <linux/kallsyms.h> #include <linux/security.h> #include <linux/seq_file.h> #include <linux/irqflags.h> #include <linux/syscalls.h> #include <linux/debugfs.h> #include <linux/tracefs.h> #include <linux/pagemap.h> #include <linux/hardirq.h> #include <linux/linkage.h> #include <linux/uaccess.h> #include <linux/cleanup.h> #include <linux/vmalloc.h> #include <linux/ftrace.h> #include <linux/module.h> #include <linux/percpu.h> #include <linux/splice.h> #include <linux/kdebug.h> #include <linux/string.h> #include <linux/mount.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/panic_notifier.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> #include <linux/trace.h> #include <linux/sched/clock.h> #include <linux/sched/rt.h> #include <linux/fsnotify.h> #include <linux/irq_work.h> #include <linux/workqueue.h> #include <linux/sort.h> #include <linux/io.h> /* vmap_page_range() */ #include <linux/fs_context.h> #include <asm/setup.h> /* COMMAND_LINE_SIZE */ #include "trace.h" #include "trace_output.h" #ifdef CONFIG_FTRACE_STARTUP_TEST /* * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results. */ static bool __read_mostly tracing_selftest_running; /* * If boot-time tracing including tracers/events via kernel cmdline * is running, we do not want to run SELFTEST. */ bool __read_mostly tracing_selftest_disabled; void __init disable_tracing_selftest(const char *reason) { if (!tracing_selftest_disabled) { tracing_selftest_disabled = true; pr_info("Ftrace startup test is disabled due to %s\n", reason); } } #else #define tracing_selftest_running 0 #define tracing_selftest_disabled 0 #endif /* Pipe tracepoints to printk */ static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; static bool tracepoint_printk_stop_on_boot __initdata; static bool traceoff_after_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* Store tracers and their flags per instance */ struct tracers { struct list_head list; struct tracer *tracer; struct tracer_flags *flags; }; /* * To prevent the comm cache from being overwritten when no * tracing is active, only save the comm when a trace event * occurred. */ DEFINE_PER_CPU(bool, trace_taskinfo_save); /* * Kill all tracing for good (never come back). * It is initialized to 1 but will turn to zero if the initialization * of the tracer is successful. But that is the only place that sets * this back to zero. */ static int tracing_disabled = 1; cpumask_var_t __read_mostly tracing_buffer_mask; #define MAX_TRACER_SIZE 100 /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops * * If there is an oops (or kernel panic) and the ftrace_dump_on_oops * is set, then ftrace_dump is called. This will output the contents * of the ftrace buffers to the console. This is very useful for * capturing traces that lead to crashes and outputting it to a * serial console. * * It is default off, but you can enable it with either specifying * "ftrace_dump_on_oops" in the kernel command line, or setting * /proc/sys/kernel/ftrace_dump_on_oops * Set 1 if you want to dump buffers of all CPUs * Set 2 if you want to dump the buffer of the CPU that triggered oops * Set instance name if you want to dump the specific trace instance * Multiple instance dump is also supported, and instances are separated * by commas. */ /* Set to string format zero to disable by default */ static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; /* When set, tracing will stop when a WARN*() is hit */ static int __disable_trace_on_warning; int tracepoint_printk_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); static const struct ctl_table trace_sysctl_table[] = { { .procname = "ftrace_dump_on_oops", .data = &ftrace_dump_on_oops, .maxlen = MAX_TRACER_SIZE, .mode = 0644, .proc_handler = proc_dostring, }, { .procname = "traceoff_on_warning", .data = &__disable_trace_on_warning, .maxlen = sizeof(__disable_trace_on_warning), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "tracepoint_printk", .data = &tracepoint_printk, .maxlen = sizeof(tracepoint_printk), .mode = 0644, .proc_handler = tracepoint_printk_sysctl, }, }; static int __init init_trace_sysctls(void) { register_sysctl_init("kernel", trace_sysctl_table); return 0; } subsys_initcall(init_trace_sysctls); #ifdef CONFIG_TRACE_EVAL_MAP_FILE /* Map of enums to their values, for "eval_map" file */ struct trace_eval_map_head { struct module *mod; unsigned long length; }; union trace_eval_map_item; struct trace_eval_map_tail { /* * "end" is first and points to NULL as it must be different * than "mod" or "eval_string" */ union trace_eval_map_item *next; const char *end; /* points to NULL */ }; static DEFINE_MUTEX(trace_eval_mutex); /* * The trace_eval_maps are saved in an array with two extra elements, * one at the beginning, and one at the end. The beginning item contains * the count of the saved maps (head.length), and the module they * belong to if not built in (head.mod). The ending item contains a * pointer to the next array of saved eval_map items. */ union trace_eval_map_item { struct trace_eval_map map; struct trace_eval_map_head head; struct trace_eval_map_tail tail; }; static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx); static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; static bool allocate_snapshot; static bool snapshot_at_boot; static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; static int boot_instance_index; static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; static int boot_snapshot_index; static int __init set_cmdline_ftrace(char *str) { strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ trace_set_ring_buffer_expanded(NULL); return 1; } __setup("ftrace=", set_cmdline_ftrace); int ftrace_dump_on_oops_enabled(void) { if (!strcmp("0", ftrace_dump_on_oops)) return 0; else return 1; } static int __init set_ftrace_dump_on_oops(char *str) { if (!*str) { strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); return 1; } if (*str == ',') { strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); return 1; } if (*str++ == '=') { strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); return 1; } return 0; } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); static int __init stop_trace_on_warning(char *str) { if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) __disable_trace_on_warning = 1; return 1; } __setup("traceoff_on_warning", stop_trace_on_warning); static int __init boot_alloc_snapshot(char *str) { char *slot = boot_snapshot_info + boot_snapshot_index; int left = sizeof(boot_snapshot_info) - boot_snapshot_index; int ret; if (str[0] == '=') { str++; if (strlen(str) >= left) return -1; ret = snprintf(slot, left, "%s\t", str); boot_snapshot_index += ret; } else { allocate_snapshot = true; /* We also need the main ring buffer expanded */ trace_set_ring_buffer_expanded(NULL); } return 1; } __setup("alloc_snapshot", boot_alloc_snapshot); static int __init boot_snapshot(char *str) { snapshot_at_boot = true; boot_alloc_snapshot(str); return 1; } __setup("ftrace_boot_snapshot", boot_snapshot); static int __init boot_instance(char *str) { char *slot = boot_instance_info + boot_instance_index; int left = sizeof(boot_instance_info) - boot_instance_index; int ret; if (strlen(str) >= left) return -1; ret = snprintf(slot, left, "%s\t", str); boot_instance_index += ret; return 1; } __setup("trace_instance=", boot_instance); static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; static int __init set_trace_boot_options(char *str) { strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); return 1; } __setup("trace_options=", set_trace_boot_options); static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; static char *trace_boot_clock __initdata; static int __init set_trace_boot_clock(char *str) { strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); trace_boot_clock = trace_boot_clock_buf; return 1; } __setup("trace_clock=", set_trace_boot_clock); static int __init set_tracepoint_printk(char *str) { /* Ignore the "tp_printk_stop_on_boot" param */ if (*str == '_') return 0; if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) tracepoint_printk = 1; return 1; } __setup("tp_printk", set_tracepoint_printk); static int __init set_tracepoint_printk_stop(char *str) { tracepoint_printk_stop_on_boot = true; return 1; } __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); static int __init set_traceoff_after_boot(char *str) { traceoff_after_boot = true; return 1; } __setup("traceoff_after_boot", set_traceoff_after_boot); unsigned long long ns2usecs(u64 nsec) { nsec += 500; do_div(nsec, 1000); return nsec; } static void trace_process_export(struct trace_export *export, struct ring_buffer_event *event, int flag) { struct trace_entry *entry; unsigned int size = 0; if (export->flags & flag) { entry = ring_buffer_event_data(event); size = ring_buffer_event_length(event); export->write(export, entry, size); } } static DEFINE_MUTEX(ftrace_export_lock); static struct trace_export __rcu *ftrace_exports_list __read_mostly; static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); static inline void ftrace_exports_enable(struct trace_export *export) { if (export->flags & TRACE_EXPORT_FUNCTION) static_branch_inc(&trace_function_exports_enabled); if (export->flags & TRACE_EXPORT_EVENT) static_branch_inc(&trace_event_exports_enabled); if (export->flags & TRACE_EXPORT_MARKER) static_branch_inc(&trace_marker_exports_enabled); } static inline void ftrace_exports_disable(struct trace_export *export) { if (export->flags & TRACE_EXPORT_FUNCTION) static_branch_dec(&trace_function_exports_enabled); if (export->flags & TRACE_EXPORT_EVENT) static_branch_dec(&trace_event_exports_enabled); if (export->flags & TRACE_EXPORT_MARKER) static_branch_dec(&trace_marker_exports_enabled); } static void ftrace_exports(struct ring_buffer_event *event, int flag) { struct trace_export *export; guard(preempt_notrace)(); export = rcu_dereference_raw_check(ftrace_exports_list); while (export) { trace_process_export(export, event, flag); export = rcu_dereference_raw_check(export->next); } } static inline void add_trace_export(struct trace_export **list, struct trace_export *export) { rcu_assign_pointer(export->next, *list); /* * We are entering export into the list but another * CPU might be walking that list. We need to make sure * the export->next pointer is valid before another CPU sees * the export pointer included into the list. */ rcu_assign_pointer(*list, export); } static inline int rm_trace_export(struct trace_export **list, struct trace_export *export) { struct trace_export **p; for (p = list; *p != NULL; p = &(*p)->next) if (*p == export) break; if (*p != export) return -1; rcu_assign_pointer(*p, (*p)->next); return 0; } static inline void add_ftrace_export(struct trace_export **list, struct trace_export *export) { ftrace_exports_enable(export); add_trace_export(list, export); } static inline int rm_ftrace_export(struct trace_export **list, struct trace_export *export) { int ret; ret = rm_trace_export(list, export); ftrace_exports_disable(export); return ret; } int register_ftrace_export(struct trace_export *export) { if (WARN_ON_ONCE(!export->write)) return -1; guard(mutex)(&ftrace_export_lock); add_ftrace_export(&ftrace_exports_list, export); return 0; } EXPORT_SYMBOL_GPL(register_ftrace_export); int unregister_ftrace_export(struct trace_export *export) { guard(mutex)(&ftrace_export_lock); return rm_ftrace_export(&ftrace_exports_list, export); } EXPORT_SYMBOL_GPL(unregister_ftrace_export); /* trace_flags holds trace_options default values */ #define TRACE_DEFAULT_FLAGS \ (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ TRACE_ITER(COPY_MARKER)) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) /* trace_flags that are default zero for instances */ #define ZEROED_TRACE_FLAGS \ (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ TRACE_ITER(COPY_MARKER)) /* * The global_trace is the descriptor that holds the top-level tracing * buffers for the live tracing. */ static struct trace_array global_trace = { .trace_flags = TRACE_DEFAULT_FLAGS, }; static struct trace_array *printk_trace = &global_trace; /* List of trace_arrays interested in the top level trace_marker */ static LIST_HEAD(marker_copies); static __always_inline bool printk_binsafe(struct trace_array *tr) { /* * The binary format of traceprintk can cause a crash if used * by a buffer from another boot. Force the use of the * non binary version of trace_printk if the trace_printk * buffer is a boot mapped ring buffer. */ return !(tr->flags & TRACE_ARRAY_FL_BOOT); } static void update_printk_trace(struct trace_array *tr) { if (printk_trace == tr) return; printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); printk_trace = tr; tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); } /* Returns true if the status of tr changed */ static bool update_marker_trace(struct trace_array *tr, int enabled) { lockdep_assert_held(&event_mutex); if (enabled) { if (!list_empty(&tr->marker_list)) return false; list_add_rcu(&tr->marker_list, &marker_copies); tr->trace_flags |= TRACE_ITER(COPY_MARKER); return true; } if (list_empty(&tr->marker_list)) return false; list_del_init(&tr->marker_list); tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); return true; } void trace_set_ring_buffer_expanded(struct trace_array *tr) { if (!tr) tr = &global_trace; tr->ring_buffer_expanded = true; } LIST_HEAD(ftrace_trace_arrays); int trace_array_get(struct trace_array *this_tr) { struct trace_array *tr; guard(mutex)(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (tr == this_tr) { tr->ref++; return 0; } } return -ENODEV; } static void __trace_array_put(struct trace_array *this_tr) { WARN_ON(!this_tr->ref); this_tr->ref--; } /** * trace_array_put - Decrement the reference counter for this trace array. * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later * destroyed. * */ void trace_array_put(struct trace_array *this_tr) { if (!this_tr) return; guard(mutex)(&trace_types_lock); __trace_array_put(this_tr); } EXPORT_SYMBOL_GPL(trace_array_put); int tracing_check_open_get_tr(struct trace_array *tr) { int ret; ret = security_locked_down(LOCKDOWN_TRACEFS); if (ret) return ret; if (tracing_disabled) return -ENODEV; if (tr && trace_array_get(tr) < 0) return -ENODEV; return 0; } /** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check * @search_pid: The PID to find in @filtered_pids * * Returns true if @search_pid is found in @filtered_pids, and false otherwise. */ bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) { return trace_pid_list_is_set(filtered_pids, search_pid); } /** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. * Returns true if @task should *NOT* be traced. * Returns false if @task should be traced. */ bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct trace_pid_list *filtered_no_pids, struct task_struct *task) { /* * If filtered_no_pids is not empty, and the task's pid is listed * in filtered_no_pids, then return true. * Otherwise, if filtered_pids is empty, that means we can * trace all tasks. If it has content, then only trace pids * within filtered_pids. */ return (filtered_pids && !trace_find_filtered_pid(filtered_pids, task->pid)) || (filtered_no_pids && trace_find_filtered_pid(filtered_no_pids, task->pid)); } /** * trace_filter_add_remove_task - Add or remove a task from a pid_list * @pid_list: The list to modify * @self: The current task for fork or NULL for exit * @task: The task to add or remove * * If adding a task, if @self is defined, the task is only added if @self * is also included in @pid_list. This happens on fork and tasks should * only be added when the parent is listed. If @self is NULL, then the * @task pid will be removed from the list, which would happen on exit * of a task. */ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, struct task_struct *self, struct task_struct *task) { if (!pid_list) return; /* For forks, we only add if the forking task is listed */ if (self) { if (!trace_find_filtered_pid(pid_list, self->pid)) return; } /* "self" is set for forks, and NULL for exits */ if (self) trace_pid_list_set(pid_list, task->pid); else trace_pid_list_clear(pid_list, task->pid); } /** * trace_pid_next - Used for seq_file to get to the next pid of a pid_list * @pid_list: The pid list to show * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) * @pos: The position of the file * * This is used by the seq_file "next" operation to iterate the pids * listed in a trace_pid_list structure. * * Returns the pid+1 as we want to display pid of zero, but NULL would * stop the iteration. */ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) { long pid = (unsigned long)v; unsigned int next; (*pos)++; /* pid already is +1 of the actual previous bit */ if (trace_pid_list_next(pid_list, pid, &next) < 0) return NULL; pid = next; /* Return pid + 1 to allow zero to be represented */ return (void *)(pid + 1); } /** * trace_pid_start - Used for seq_file to start reading pid lists * @pid_list: The pid list to show * @pos: The position of the file * * This is used by seq_file "start" operation to start the iteration * of listing pids. * * Returns the pid+1 as we want to display pid of zero, but NULL would * stop the iteration. */ void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) { unsigned long pid; unsigned int first; loff_t l = 0; if (trace_pid_list_first(pid_list, &first) < 0) return NULL; pid = first; /* Return pid + 1 so that zero can be the exit value */ for (pid++; pid && l < *pos; pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) ; return (void *)pid; } /** * trace_pid_show - show the current pid in seq_file processing * @m: The seq_file structure to write into * @v: A void pointer of the pid (+1) value to display * * Can be directly used by seq_file operations to display the current * pid value. */ int trace_pid_show(struct seq_file *m, void *v) { unsigned long pid = (unsigned long)v - 1; seq_printf(m, "%lu\n", pid); return 0; } /* 128 should be much more than enough */ #define PID_BUF_SIZE 127 int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt) { struct trace_pid_list *pid_list; struct trace_parser parser; unsigned long val; int nr_pids = 0; ssize_t read = 0; ssize_t ret; loff_t pos; pid_t pid; if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) return -ENOMEM; /* * Always recreate a new array. The write is an all or nothing * operation. Always create a new array when adding new pids by * the user. If the operation fails, then the current list is * not modified. */ pid_list = trace_pid_list_alloc(); if (!pid_list) { trace_parser_put(&parser); return -ENOMEM; } if (filtered_pids) { /* copy the current bits to the new max */ ret = trace_pid_list_first(filtered_pids, &pid); while (!ret) { ret = trace_pid_list_set(pid_list, pid); if (ret < 0) goto out; ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); nr_pids++; } } ret = 0; while (cnt > 0) { pos = 0; ret = trace_get_user(&parser, ubuf, cnt, &pos); if (ret < 0) break; read += ret; ubuf += ret; cnt -= ret; if (!trace_parser_loaded(&parser)) break; ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; pid = (pid_t)val; if (trace_pid_list_set(pid_list, pid) < 0) { ret = -1; break; } nr_pids++; trace_parser_clear(&parser); ret = 0; } out: trace_parser_put(&parser); if (ret < 0) { trace_pid_list_free(pid_list); return ret; } if (!nr_pids) { /* Cleared the list of pids */ trace_pid_list_free(pid_list); pid_list = NULL; } *new_pid_list = pid_list; return read; } static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) { u64 ts; /* Early boot up does not have a buffer yet */ if (!buf->buffer) return trace_clock_local(); ts = ring_buffer_time_stamp(buf->buffer); ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); return ts; } u64 ftrace_now(int cpu) { return buffer_ftrace_now(&global_trace.array_buffer, cpu); } /** * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for * the irqsoff tracer. But it may be inaccurate due to races. If you * need to know the accurate state, use tracing_is_on() which is a little * slower, but accurate. */ int tracing_is_enabled(void) { /* * For quick access (irqsoff uses this in fast path), just * return the mirror variable of the state of the ring buffer. * It's a little racy, but we don't really care. */ return !global_trace.buffer_disabled; } /* * trace_buf_size is the size in bytes that is allocated * for a buffer. Note, the number of bytes is always rounded * to page size. * * This number is purposely set to a low number of 16384. * If the dump on oops happens, it will be much appreciated * to not have to wait for all that output. Anyway this can be * boot time and run time configurable. */ #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; /* trace_types holds a link list of available tracers. */ static struct tracer *trace_types __read_mostly; /* * trace_types_lock is used to protect the trace_types list. */ DEFINE_MUTEX(trace_types_lock); /* * serialize the access of the ring buffer * * ring buffer serializes readers, but it is low level protection. * The validity of the events (which returns by ring_buffer_peek() ..etc) * are not protected by ring buffer. * * The content of events may become garbage if we allow other process consumes * these events concurrently: * A) the page of the consumed events may become a normal page * (not reader page) in ring buffer, and this page will be rewritten * by events producer. * B) The page of the consumed events may become a page for splice_read, * and this page will be returned to system. * * These primitives allow multi process access to different cpu ring buffer * concurrently. * * These primitives don't distinguish read-only and read-consume access. * Multi read-only access are also serialized. */ #ifdef CONFIG_SMP static DECLARE_RWSEM(all_cpu_access_lock); static DEFINE_PER_CPU(struct mutex, cpu_access_lock); static inline void trace_access_lock(int cpu) { if (cpu == RING_BUFFER_ALL_CPUS) { /* gain it for accessing the whole ring buffer. */ down_write(&all_cpu_access_lock); } else { /* gain it for accessing a cpu ring buffer. */ /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ down_read(&all_cpu_access_lock); /* Secondly block other access to this @cpu ring buffer. */ mutex_lock(&per_cpu(cpu_access_lock, cpu)); } } static inline void trace_access_unlock(int cpu) { if (cpu == RING_BUFFER_ALL_CPUS) { up_write(&all_cpu_access_lock); } else { mutex_unlock(&per_cpu(cpu_access_lock, cpu)); up_read(&all_cpu_access_lock); } } static inline void trace_access_lock_init(void) { int cpu; for_each_possible_cpu(cpu) mutex_init(&per_cpu(cpu_access_lock, cpu)); } #else static DEFINE_MUTEX(access_lock); static inline void trace_access_lock(int cpu) { (void)cpu; mutex_lock(&access_lock); } static inline void trace_access_unlock(int cpu) { (void)cpu; mutex_unlock(&access_lock); } static inline void trace_access_lock_init(void) { } #endif #ifdef CONFIG_STACKTRACE static void __ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs); #else static inline void __ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned long trace_ctx, int skip, struct pt_regs *regs) { } #endif static __always_inline void trace_event_setup(struct ring_buffer_event *event, int type, unsigned int trace_ctx) { struct trace_entry *ent = ring_buffer_event_data(event); tracing_generic_entry_update(ent, type, trace_ctx); } static __always_inline struct ring_buffer_event * __trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned int trace_ctx) { struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) trace_event_setup(event, type, trace_ctx); return event; } void tracer_tracing_on(struct trace_array *tr) { if (tr->array_buffer.buffer) ring_buffer_record_on(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to * know if the ring buffer has been disabled, but it can handle * races of where it gets disabled but we still do a record. * As the check is in the fast path of the tracers, it is more * important to be fast than accurate. */ tr->buffer_disabled = 0; } /** * tracing_on - enable tracing buffers * * This function enables tracing buffers that may have been * disabled with tracing_off. */ void tracing_on(void) { tracer_tracing_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_on); static __always_inline void __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_taskinfo_save, true); /* If this is the temp buffer, we need to commit fully */ if (this_cpu_read(trace_buffered_event) == event) { /* Length is in event->array[0] */ ring_buffer_write(buffer, event->array[0], &event->array[1]); /* Release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); /* ring_buffer_unlock_commit() enables preemption */ preempt_enable_notrace(); } else ring_buffer_unlock_commit(buffer); } int __trace_array_puts(struct trace_array *tr, unsigned long ip, const char *str, int size) { struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; unsigned int trace_ctx; int alloc; if (!(tr->trace_flags & TRACE_ITER(PRINTK))) return 0; if (unlikely(tracing_selftest_running && tr == &global_trace)) return 0; if (unlikely(tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, trace_ctx); if (!event) return 0; entry = ring_buffer_event_data(event); entry->ip = ip; memcpy(&entry->buf, str, size); /* Add a newline if necessary */ if (entry->buf[size - 1] != '\n') { entry->buf[size] = '\n'; entry->buf[size + 1] = '\0'; } else entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); return size; } EXPORT_SYMBOL_GPL(__trace_array_puts); /** * __trace_puts - write a constant string into the trace buffer. * @ip: The address of the caller * @str: The constant string to write * @size: The size of the string. */ int __trace_puts(unsigned long ip, const char *str, int size) { return __trace_array_puts(printk_trace, ip, str, size); } EXPORT_SYMBOL_GPL(__trace_puts); /** * __trace_bputs - write the pointer to a constant string into trace buffer * @ip: The address of the caller * @str: The constant string to write to the buffer to */ int __trace_bputs(unsigned long ip, const char *str) { struct trace_array *tr = READ_ONCE(printk_trace); struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; unsigned int trace_ctx; int size = sizeof(struct bputs_entry); if (!printk_binsafe(tr)) return __trace_puts(ip, str, strlen(str)); if (!(tr->trace_flags & TRACE_ITER(PRINTK))) return 0; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, trace_ctx); if (!event) return 0; entry = ring_buffer_event_data(event); entry->ip = ip; entry->str = str; __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); return 1; } EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT static void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) { struct tracer *tracer = tr->current_trace; unsigned long flags; if (in_nmi()) { trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); trace_array_puts(tr, "*** snapshot is being ignored ***\n"); return; } if (!tr->allocated_snapshot) { trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); trace_array_puts(tr, "*** stopping trace here! ***\n"); tracer_tracing_off(tr); return; } /* Note, snapshot can not be used when the tracer uses it */ if (tracer->use_max_tr) { trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } if (tr->mapped) { trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } local_irq_save(flags); update_max_tr(tr, current, smp_processor_id(), cond_data); local_irq_restore(flags); } void tracing_snapshot_instance(struct trace_array *tr) { tracing_snapshot_instance_cond(tr, NULL); } /** * tracing_snapshot - take a snapshot of the current buffer. * * This causes a swap between the snapshot buffer and the current live * tracing buffer. You can use this to take snapshots of the live * trace when some condition is triggered, but continue to trace. * * Note, make sure to allocate the snapshot with either * a tracing_snapshot_alloc(), or by doing it manually * with: echo 1 > /sys/kernel/tracing/snapshot * * If the snapshot buffer is not allocated, it will stop tracing. * Basically making a permanent snapshot. */ void tracing_snapshot(void) { struct trace_array *tr = &global_trace; tracing_snapshot_instance(tr); } EXPORT_SYMBOL_GPL(tracing_snapshot); /** * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. * @tr: The tracing instance to snapshot * @cond_data: The data to be tested conditionally, and possibly saved * * This is the same as tracing_snapshot() except that the snapshot is * conditional - the snapshot will only happen if the * cond_snapshot.update() implementation receiving the cond_data * returns true, which means that the trace array's cond_snapshot * update() operation used the cond_data to determine whether the * snapshot should be taken, and if it was, presumably saved it along * with the snapshot. */ void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) { tracing_snapshot_instance_cond(tr, cond_data); } EXPORT_SYMBOL_GPL(tracing_snapshot_cond); /** * tracing_cond_snapshot_data - get the user data associated with a snapshot * @tr: The tracing instance * * When the user enables a conditional snapshot using * tracing_snapshot_cond_enable(), the user-defined cond_data is saved * with the snapshot. This accessor is used to retrieve it. * * Should not be called from cond_snapshot.update(), since it takes * the tr->max_lock lock, which the code calling * cond_snapshot.update() has already done. * * Returns the cond_data associated with the trace array's snapshot. */ void *tracing_cond_snapshot_data(struct trace_array *tr) { void *cond_data = NULL; local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) cond_data = tr->cond_snapshot->cond_data; arch_spin_unlock(&tr->max_lock); local_irq_enable(); return cond_data; } EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, struct array_buffer *size_buf, int cpu_id); static void set_buffer_entries(struct array_buffer *buf, unsigned long val); int tracing_alloc_snapshot_instance(struct trace_array *tr) { int order; int ret; if (!tr->allocated_snapshot) { /* Make the snapshot buffer have the same order as main buffer */ order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); if (ret < 0) return ret; /* allocate spare buffer */ ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; tr->allocated_snapshot = true; } return 0; } static void free_snapshot(struct trace_array *tr) { /* * We don't free the ring buffer. instead, resize it because * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it. */ ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&tr->max_buffer, 1); tracing_reset_online_cpus(&tr->max_buffer); tr->allocated_snapshot = false; } static int tracing_arm_snapshot_locked(struct trace_array *tr) { int ret; lockdep_assert_held(&trace_types_lock); spin_lock(&tr->snapshot_trigger_lock); if (tr->snapshot == UINT_MAX || tr->mapped) { spin_unlock(&tr->snapshot_trigger_lock); return -EBUSY; } tr->snapshot++; spin_unlock(&tr->snapshot_trigger_lock); ret = tracing_alloc_snapshot_instance(tr); if (ret) { spin_lock(&tr->snapshot_trigger_lock); tr->snapshot--; spin_unlock(&tr->snapshot_trigger_lock); } return ret; } int tracing_arm_snapshot(struct trace_array *tr) { guard(mutex)(&trace_types_lock); return tracing_arm_snapshot_locked(tr); } void tracing_disarm_snapshot(struct trace_array *tr) { spin_lock(&tr->snapshot_trigger_lock); if (!WARN_ON(!tr->snapshot)) tr->snapshot--; spin_unlock(&tr->snapshot_trigger_lock); } /** * tracing_alloc_snapshot - allocate snapshot buffer. * * This only allocates the snapshot buffer if it isn't already * allocated - it doesn't also take a snapshot. * * This is meant to be used in cases where the snapshot buffer needs * to be set up for events that can't sleep but need to be able to * trigger a snapshot. */ int tracing_alloc_snapshot(void) { struct trace_array *tr = &global_trace; int ret; ret = tracing_alloc_snapshot_instance(tr); WARN_ON(ret < 0); return ret; } EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); /** * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. * * This is similar to tracing_snapshot(), but it will allocate the * snapshot buffer if it isn't already allocated. Use this only * where it is safe to sleep, as the allocation may sleep. * * This causes a swap between the snapshot buffer and the current live * tracing buffer. You can use this to take snapshots of the live * trace when some condition is triggered, but continue to trace. */ void tracing_snapshot_alloc(void) { int ret; ret = tracing_alloc_snapshot(); if (ret < 0) return; tracing_snapshot(); } EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); /** * tracing_snapshot_cond_enable - enable conditional snapshot for an instance * @tr: The tracing instance * @cond_data: User data to associate with the snapshot * @update: Implementation of the cond_snapshot update function * * Check whether the conditional snapshot for the given instance has * already been enabled, or if the current tracer is already using a * snapshot; if so, return -EBUSY, else create a cond_snapshot and * save the cond_data and update function inside. * * Returns 0 if successful, error otherwise. */ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) { struct cond_snapshot *cond_snapshot __free(kfree) = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); int ret; if (!cond_snapshot) return -ENOMEM; cond_snapshot->cond_data = cond_data; cond_snapshot->update = update; guard(mutex)(&trace_types_lock); if (tr->current_trace->use_max_tr) return -EBUSY; /* * The cond_snapshot can only change to NULL without the * trace_types_lock. We don't care if we race with it going * to NULL, but we want to make sure that it's not set to * something other than NULL when we get here, which we can * do safely with only holding the trace_types_lock and not * having to take the max_lock. */ if (tr->cond_snapshot) return -EBUSY; ret = tracing_arm_snapshot_locked(tr); if (ret) return ret; local_irq_disable(); arch_spin_lock(&tr->max_lock); tr->cond_snapshot = no_free_ptr(cond_snapshot); arch_spin_unlock(&tr->max_lock); local_irq_enable(); return 0; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); /** * tracing_snapshot_cond_disable - disable conditional snapshot for an instance * @tr: The tracing instance * * Check whether the conditional snapshot for the given instance is * enabled; if so, free the cond_snapshot associated with it, * otherwise return -EINVAL. * * Returns 0 if successful, error otherwise. */ int tracing_snapshot_cond_disable(struct trace_array *tr) { int ret = 0; local_irq_disable(); arch_spin_lock(&tr->max_lock); if (!tr->cond_snapshot) ret = -EINVAL; else { kfree(tr->cond_snapshot); tr->cond_snapshot = NULL; } arch_spin_unlock(&tr->max_lock); local_irq_enable(); tracing_disarm_snapshot(tr); return ret; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); #else void tracing_snapshot(void) { WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); } EXPORT_SYMBOL_GPL(tracing_snapshot); void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) { WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); } EXPORT_SYMBOL_GPL(tracing_snapshot_cond); int tracing_alloc_snapshot(void) { WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); return -ENODEV; } EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); void tracing_snapshot_alloc(void) { /* Give warning */ tracing_snapshot(); } EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); void *tracing_cond_snapshot_data(struct trace_array *tr) { return NULL; } EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) { return -ENODEV; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); int tracing_snapshot_cond_disable(struct trace_array *tr) { return false; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); #define free_snapshot(tr) do { } while (0) #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) #endif /* CONFIG_TRACER_SNAPSHOT */ void tracer_tracing_off(struct trace_array *tr) { if (tr->array_buffer.buffer) ring_buffer_record_off(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to * know if the ring buffer has been disabled, but it can handle * races of where it gets disabled but we still do a record. * As the check is in the fast path of the tracers, it is more * important to be fast than accurate. */ tr->buffer_disabled = 1; } /** * tracer_tracing_disable() - temporary disable the buffer from write * @tr: The trace array to disable its buffer for * * Expects trace_tracing_enable() to re-enable tracing. * The difference between this and tracer_tracing_off() is that this * is a counter and can nest, whereas, tracer_tracing_off() can * be called multiple times and a single trace_tracing_on() will * enable it. */ void tracer_tracing_disable(struct trace_array *tr) { if (WARN_ON_ONCE(!tr->array_buffer.buffer)) return; ring_buffer_record_disable(tr->array_buffer.buffer); } /** * tracer_tracing_enable() - counter part of tracer_tracing_disable() * @tr: The trace array that had tracer_tracincg_disable() called on it * * This is called after tracer_tracing_disable() has been called on @tr, * when it's safe to re-enable tracing. */ void tracer_tracing_enable(struct trace_array *tr) { if (WARN_ON_ONCE(!tr->array_buffer.buffer)) return; ring_buffer_record_enable(tr->array_buffer.buffer); } /** * tracing_off - turn off tracing buffers * * This function stops the tracing buffers from recording data. * It does not disable any overhead the tracers themselves may * be causing. This function simply causes all recording to * the ring buffers to fail. */ void tracing_off(void) { tracer_tracing_off(&global_trace); } EXPORT_SYMBOL_GPL(tracing_off); void disable_trace_on_warning(void) { if (__disable_trace_on_warning) { trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, "Disabling tracing due to warning\n"); tracing_off(); } } /** * tracer_tracing_is_on - show real state of ring buffer enabled * @tr : the trace array to know if ring buffer is enabled * * Shows real state of the ring buffer if it is enabled or not. */ bool tracer_tracing_is_on(struct trace_array *tr) { if (tr->array_buffer.buffer) return ring_buffer_record_is_set_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } /** * tracing_is_on - show state of ring buffers enabled */ int tracing_is_on(void) { return tracer_tracing_is_on(&global_trace); } EXPORT_SYMBOL_GPL(tracing_is_on); static int __init set_buf_size(char *str) { unsigned long buf_size; if (!str) return 0; buf_size = memparse(str, &str); /* * nr_entries can not be zero and the startup * tests require some buffer space. Therefore * ensure we have at least 4096 bytes of buffer. */ trace_buf_size = max(4096UL, buf_size); return 1; } __setup("trace_buf_size=", set_buf_size); static int __init set_tracing_thresh(char *str) { unsigned long threshold; int ret; if (!str) return 0; ret = kstrtoul(str, 0, &threshold); if (ret < 0) return 0; tracing_thresh = threshold * 1000; return 1; } __setup("tracing_thresh=", set_tracing_thresh); unsigned long nsecs_to_usecs(unsigned long nsecs) { return nsecs / 1000; } /* * TRACE_FLAGS is defined as a tuple matching bit masks with strings. * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list * of strings in the order that the evals (enum) were defined. */ #undef C #define C(a, b) b /* These must match the bit positions in trace_iterator_flags */ static const char *trace_options[] = { TRACE_FLAGS NULL }; static struct { u64 (*func)(void); const char *name; int in_ns; /* is this clock in nanoseconds? */ } trace_clocks[] = { { trace_clock_local, "local", 1 }, { trace_clock_global, "global", 1 }, { trace_clock_counter, "counter", 0 }, { trace_clock_jiffies, "uptime", 0 }, { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, { ktime_get_boot_fast_ns, "boot", 1 }, { ktime_get_tai_fast_ns, "tai", 1 }, ARCH_TRACE_CLOCKS }; bool trace_clock_in_ns(struct trace_array *tr) { if (trace_clocks[tr->clock_id].in_ns) return true; return false; } /* * trace_parser_get_init - gets the buffer for trace parser */ int trace_parser_get_init(struct trace_parser *parser, int size) { memset(parser, 0, sizeof(*parser)); parser->buffer = kmalloc(size, GFP_KERNEL); if (!parser->buffer) return 1; parser->size = size; return 0; } /* * trace_parser_put - frees the buffer for trace parser */ void trace_parser_put(struct trace_parser *parser) { kfree(parser->buffer); parser->buffer = NULL; } /* * trace_get_user - reads the user input string separated by space * (matched by isspace(ch)) * * For each string found the 'struct trace_parser' is updated, * and the function returns. * * Returns number of bytes read. * * See kernel/trace/trace.h for 'struct trace_parser' details. */ int trace_get_user(struct trace_parser *parser, const char __user *ubuf, size_t cnt, loff_t *ppos) { char ch; size_t read = 0; ssize_t ret; if (!*ppos) trace_parser_clear(parser); ret = get_user(ch, ubuf++); if (ret) goto fail; read++; cnt--; /* * The parser is not finished with the last write, * continue reading the user input without skipping spaces. */ if (!parser->cont) { /* skip white space */ while (cnt && isspace(ch)) { ret = get_user(ch, ubuf++); if (ret) goto fail; read++; cnt--; } parser->idx = 0; /* only spaces were written */ if (isspace(ch) || !ch) { *ppos += read; return read; } } /* read the non-space input */ while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1) parser->buffer[parser->idx++] = ch; else { ret = -EINVAL; goto fail; } ret = get_user(ch, ubuf++); if (ret) goto fail; read++; cnt--; } /* We either got finished input or we have to wait for another call. */ if (isspace(ch) || !ch) { parser->buffer[parser->idx] = 0; parser->cont = false; } else if (parser->idx < parser->size - 1) { parser->cont = true; parser->buffer[parser->idx++] = ch; /* Make sure the parsed string always terminates with '\0'. */ parser->buffer[parser->idx] = 0; } else { ret = -EINVAL; goto fail; } *ppos += read; return read; fail: trace_parser_fail(parser); return ret; } /* TODO add a seq_buf_to_buffer() */ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) { int len; if (trace_seq_used(s) <= s->readpos) return -EBUSY; len = trace_seq_used(s) - s->readpos; if (cnt > len) cnt = len; memcpy(buf, s->buffer + s->readpos, cnt); s->readpos += cnt; return cnt; } unsigned long __read_mostly tracing_thresh; #ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops; #ifdef LATENCY_FS_NOTIFY static struct workqueue_struct *fsnotify_wq; static void latency_fsnotify_workfn(struct work_struct *work) { struct trace_array *tr = container_of(work, struct trace_array, fsnotify_work); fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); } static void latency_fsnotify_workfn_irq(struct irq_work *iwork) { struct trace_array *tr = container_of(iwork, struct trace_array, fsnotify_irqwork); queue_work(fsnotify_wq, &tr->fsnotify_work); } static void trace_create_maxlat_file(struct trace_array *tr, struct dentry *d_tracer) { INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); tr->d_max_latency = trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, d_tracer, tr, &tracing_max_lat_fops); } __init static int latency_fsnotify_init(void) { fsnotify_wq = alloc_workqueue("tr_max_lat_wq", WQ_UNBOUND | WQ_HIGHPRI, 0); if (!fsnotify_wq) { pr_err("Unable to allocate tr_max_lat_wq\n"); return -ENOMEM; } return 0; } late_initcall_sync(latency_fsnotify_init); void latency_fsnotify(struct trace_array *tr) { if (!fsnotify_wq) return; /* * We cannot call queue_work(&tr->fsnotify_work) from here because it's * possible that we are called from __schedule() or do_idle(), which * could cause a deadlock. */ irq_work_queue(&tr->fsnotify_irqwork); } #else /* !LATENCY_FS_NOTIFY */ #define trace_create_maxlat_file(tr, d_tracer) \ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ d_tracer, tr, &tracing_max_lat_fops) #endif /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, * for later retrieval via /sys/kernel/tracing/tracing_max_latency) */ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct array_buffer *trace_buf = &tr->array_buffer; struct array_buffer *max_buf = &tr->max_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); max_buf->cpu = cpu; max_buf->time_start = data->preempt_timestamp; max_data->saved_latency = tr->max_latency; max_data->critical_start = data->critical_start; max_data->critical_end = data->critical_end; strscpy(max_data->comm, tsk->comm); max_data->pid = tsk->pid; /* * If tsk == current, then use current_uid(), as that does not use * RCU. The irq tracer can be called out of RCU scope. */ if (tsk == current) max_data->uid = current_uid(); else max_data->uid = task_uid(tsk); max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; max_data->policy = tsk->policy; max_data->rt_priority = tsk->rt_priority; /* record this tasks comm */ tracing_record_cmdline(tsk); latency_fsnotify(tr); } /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer * @tsk: the task with the latency * @cpu: The cpu that initiated the trace. * @cond_data: User data associated with a conditional snapshot * * Flip the buffers between the @tr and the max_tr and record information * about which task was the cause of this latency. */ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data) { if (tr->stop_count) return; WARN_ON_ONCE(!irqs_disabled()); if (!tr->allocated_snapshot) { /* Only the nop tracer should hit this when disabling */ WARN_ON_ONCE(tr->current_trace != &nop_trace); return; } arch_spin_lock(&tr->max_lock); /* Inherit the recordable setting from array_buffer */ if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) ring_buffer_record_on(tr->max_buffer.buffer); else ring_buffer_record_off(tr->max_buffer.buffer); #ifdef CONFIG_TRACER_SNAPSHOT if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { arch_spin_unlock(&tr->max_lock); return; } #endif swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); /* Any waiters on the old snapshot buffer need to wake up */ ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); } /** * update_max_tr_single - only copy one trace over, and reset the rest * @tr: tracer * @tsk: task with the latency * @cpu: the cpu of the buffer to copy. * * Flip the trace of a single CPU buffer between the @tr and the max_tr. */ void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { int ret; if (tr->stop_count) return; WARN_ON_ONCE(!irqs_disabled()); if (!tr->allocated_snapshot) { /* Only the nop tracer should hit this when disabling */ WARN_ON_ONCE(tr->current_trace != &nop_trace); return; } arch_spin_lock(&tr->max_lock); ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { /* * We failed to swap the buffer due to a commit taking * place on this CPU. We fail to record, but we reset * the max trace buffer (no one writes directly to it) * and flag that it failed. * Another reason is resize is in progress. */ trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, "Failed to swap buffers due to commit or resize in progress\n"); } WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } #endif /* CONFIG_TRACER_MAX_TRACE */ struct pipe_wait { struct trace_iterator *iter; int wait_index; }; static bool wait_pipe_cond(void *data) { struct pipe_wait *pwait = data; struct trace_iterator *iter = pwait->iter; if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) return true; return iter->closed; } static int wait_on_pipe(struct trace_iterator *iter, int full) { struct pipe_wait pwait; int ret; /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; pwait.wait_index = atomic_read_acquire(&iter->wait_index); pwait.iter = iter; ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, wait_pipe_cond, &pwait); #ifdef CONFIG_TRACER_MAX_TRACE /* * Make sure this is still the snapshot buffer, as if a snapshot were * to happen, this would now be the main buffer. */ if (iter->snapshot) iter->array_buffer = &iter->tr->max_buffer; #endif return ret; } #ifdef CONFIG_FTRACE_STARTUP_TEST static bool selftests_can_run; struct trace_selftests { struct list_head list; struct tracer *type; }; static LIST_HEAD(postponed_selftests); static int save_selftest(struct tracer *type) { struct trace_selftests *selftest; selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); if (!selftest) return -ENOMEM; selftest->type = type; list_add(&selftest->list, &postponed_selftests); return 0; } static int run_tracer_selftest(struct tracer *type) { struct trace_array *tr = &global_trace; struct tracer_flags *saved_flags = tr->current_trace_flags; struct tracer *saved_tracer = tr->current_trace; int ret; if (!type->selftest || tracing_selftest_disabled) return 0; /* * If a tracer registers early in boot up (before scheduling is * initialized and such), then do not run its selftests yet. * Instead, run it a little later in the boot process. */ if (!selftests_can_run) return save_selftest(type); if (!tracing_is_on()) { pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", type->name); return 0; } /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current * tracer to be this tracer. The tracer can then run some * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ tracing_reset_online_cpus(&tr->array_buffer); tr->current_trace = type; tr->current_trace_flags = type->flags ? : type->default_flags; #ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { /* If we expanded the buffers, make sure the max is expanded too */ if (tr->ring_buffer_expanded) ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, RING_BUFFER_ALL_CPUS); tr->allocated_snapshot = true; } #endif /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); /* the test is responsible for resetting too */ tr->current_trace = saved_tracer; tr->current_trace_flags = saved_flags; if (ret) { printk(KERN_CONT "FAILED!\n"); /* Add the warning after printing 'FAILED' */ WARN_ON(1); return -1; } /* Only reset on passing, to avoid touching corrupted buffers */ tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { tr->allocated_snapshot = false; /* Shrink the max buffer again */ if (tr->ring_buffer_expanded) ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); } #endif printk(KERN_CONT "PASSED\n"); return 0; } static int do_run_tracer_selftest(struct tracer *type) { int ret; /* * Tests can take a long time, especially if they are run one after the * other, as does happen during bootup when all the tracers are * registered. This could cause the soft lockup watchdog to trigger. */ cond_resched(); tracing_selftest_running = true; ret = run_tracer_selftest(type); tracing_selftest_running = false; return ret; } static __init int init_trace_selftests(void) { struct trace_selftests *p, *n; struct tracer *t, **last; int ret; selftests_can_run = true; guard(mutex)(&trace_types_lock); if (list_empty(&postponed_selftests)) return 0; pr_info("Running postponed tracer tests:\n"); tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { /* This loop can take minutes when sanitizers are enabled, so * lets make sure we allow RCU processing. */ cond_resched(); ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ if (ret < 0) { WARN(1, "tracer: %s failed selftest, disabling\n", p->type->name); last = &trace_types; for (t = trace_types; t; t = t->next) { if (t == p->type) { *last = t->next; break; } last = &t->next; } } list_del(&p->list); kfree(p); } tracing_selftest_running = false; return 0; } core_initcall(init_trace_selftests); #else static inline int do_run_tracer_selftest(struct tracer *type) { return 0; } #endif /* CONFIG_FTRACE_STARTUP_TEST */ static int add_tracer(struct trace_array *tr, struct tracer *t); static void __init apply_trace_boot_options(void); static void free_tracers(struct trace_array *tr) { struct tracers *t, *n; lockdep_assert_held(&trace_types_lock); list_for_each_entry_safe(t, n, &tr->tracers, list) { list_del(&t->list); kfree(t->flags); kfree(t); } } /** * register_tracer - register a tracer with the ftrace system. * @type: the plugin for the tracer * * Register a new plugin tracer. */ int __init register_tracer(struct tracer *type) { struct trace_array *tr; struct tracer *t; int ret = 0; if (!type->name) { pr_info("Tracer must have a name\n"); return -1; } if (strlen(type->name) >= MAX_TRACER_SIZE) { pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); return -1; } if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Can not register tracer %s due to lockdown\n", type->name); return -EPERM; } mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */ pr_info("Tracer %s already registered\n", type->name); ret = -1; goto out; } } /* store the tracer for __set_tracer_option */ if (type->flags) type->flags->trace = type; ret = do_run_tracer_selftest(type); if (ret < 0) goto out; list_for_each_entry(tr, &ftrace_trace_arrays, list) { ret = add_tracer(tr, type); if (ret < 0) { /* The tracer will still exist but without options */ pr_warn("Failed to create tracer options for %s\n", type->name); break; } } type->next = trace_types; trace_types = type; out: mutex_unlock(&trace_types_lock); if (ret || !default_bootup_tracer) return ret; if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) return 0; printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */ WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); default_bootup_tracer = NULL; apply_trace_boot_options(); /* disable other selftests, since this will break it. */ disable_tracing_selftest("running a tracer"); return 0; } static void tracing_reset_cpu(struct array_buffer *buf, int cpu) { struct trace_buffer *buffer = buf->buffer; if (!buffer) return; ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ synchronize_rcu(); ring_buffer_reset_cpu(buffer, cpu); ring_buffer_record_enable(buffer); } void tracing_reset_online_cpus(struct array_buffer *buf) { struct trace_buffer *buffer = buf->buffer; if (!buffer) return; ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ synchronize_rcu(); buf->time_start = buffer_ftrace_now(buf, buf->cpu); ring_buffer_reset_online_cpus(buffer); ring_buffer_record_enable(buffer); } static void tracing_reset_all_cpus(struct array_buffer *buf) { struct trace_buffer *buffer = buf->buffer; if (!buffer) return; ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ synchronize_rcu(); buf->time_start = buffer_ftrace_now(buf, buf->cpu); ring_buffer_reset(buffer); ring_buffer_record_enable(buffer); } /* Must have trace_types_lock held */ void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; lockdep_assert_held(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; tr->clear_trace = false; tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE tracing_reset_online_cpus(&tr->max_buffer); #endif } } void tracing_reset_all_online_cpus(void) { guard(mutex)(&trace_types_lock); tracing_reset_all_online_cpus_unlocked(); } int is_tracing_stopped(void) { return global_trace.stop_count; } static void tracing_start_tr(struct trace_array *tr) { struct trace_buffer *buffer; if (tracing_disabled) return; guard(raw_spinlock_irqsave)(&tr->start_lock); if (--tr->stop_count) { if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */ tr->stop_count = 0; } return; } /* Prevent the buffers from switching */ arch_spin_lock(&tr->max_lock); buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #endif arch_spin_unlock(&tr->max_lock); } /** * tracing_start - quick start of the tracer * * If tracing is enabled but was stopped by tracing_stop, * this will start the tracer back up. */ void tracing_start(void) { return tracing_start_tr(&global_trace); } static void tracing_stop_tr(struct trace_array *tr) { struct trace_buffer *buffer; guard(raw_spinlock_irqsave)(&tr->start_lock); if (tr->stop_count++) return; /* Prevent the buffers from switching */ arch_spin_lock(&tr->max_lock); buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #endif arch_spin_unlock(&tr->max_lock); } /** * tracing_stop - quick stop of the tracer * * Light weight way to stop tracing. Use in conjunction with * tracing_start. */ void tracing_stop(void) { return tracing_stop_tr(&global_trace); } /* * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function * simplifies those functions and keeps them in sync. */ enum print_line_t trace_handle_return(struct trace_seq *s) { return trace_seq_has_overflowed(s) ? TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; } EXPORT_SYMBOL_GPL(trace_handle_return); static unsigned short migration_disable_value(void) { #if defined(CONFIG_SMP) return current->migration_disabled; #else return 0; #endif } unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { unsigned int trace_flags = irqs_status; unsigned int pc; pc = preempt_count(); if (pc & NMI_MASK) trace_flags |= TRACE_FLAG_NMI; if (pc & HARDIRQ_MASK) trace_flags |= TRACE_FLAG_HARDIRQ; if (in_serving_softirq()) trace_flags |= TRACE_FLAG_SOFTIRQ; if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) trace_flags |= TRACE_FLAG_BH_OFF; if (tif_need_resched()) trace_flags |= TRACE_FLAG_NEED_RESCHED; if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, unsigned int trace_ctx) { return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); DEFINE_PER_CPU(int, trace_buffered_event_cnt); static int trace_buffered_event_ref; /** * trace_buffered_event_enable - enable buffering events * * When events are being filtered, it is quicker to use a temporary * buffer to write the event data into if there's a likely chance * that it will not be committed. The discard of the ring buffer * is not as fast as committing, and is much slower than copying * a commit. * * When an event is to be filtered, allocate per cpu buffers to * write the event data into, and if the event is filtered and discarded * it is simply dropped, otherwise, the entire data is to be committed * in one shot. */ void trace_buffered_event_enable(void) { struct ring_buffer_event *event; struct page *page; int cpu; WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); if (trace_buffered_event_ref++) return; for_each_tracing_cpu(cpu) { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); /* This is just an optimization and can handle failures */ if (!page) { pr_err("Failed to allocate event buffer\n"); break; } event = page_address(page); memset(event, 0, sizeof(*event)); per_cpu(trace_buffered_event, cpu) = event; scoped_guard(preempt,) { if (cpu == smp_processor_id() && __this_cpu_read(trace_buffered_event) != per_cpu(trace_buffered_event, cpu)) WARN_ON_ONCE(1); } } } static void enable_trace_buffered_event(void *data) { this_cpu_dec(trace_buffered_event_cnt); } static void disable_trace_buffered_event(void *data) { this_cpu_inc(trace_buffered_event_cnt); } /** * trace_buffered_event_disable - disable buffering events * * When a filter is removed, it is faster to not use the buffered * events, and to commit directly into the ring buffer. Free up * the temp buffers when there are no more users. This requires * special synchronization with current events. */ void trace_buffered_event_disable(void) { int cpu; WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); if (WARN_ON_ONCE(!trace_buffered_event_ref)) return; if (--trace_buffered_event_ref) return; /* For each CPU, set the buffer as used. */ on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, NULL, true); /* Wait for all current users to finish */ synchronize_rcu(); for_each_tracing_cpu(cpu) { free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); per_cpu(trace_buffered_event, cpu) = NULL; } /* * Wait for all CPUs that potentially started checking if they can use * their event buffer only after the previous synchronize_rcu() call and * they still read a valid pointer from trace_buffered_event. It must be * ensured they don't see cleared trace_buffered_event_cnt else they * could wrongly decide to use the pointed-to buffer which is now freed. */ synchronize_rcu(); /* For each CPU, relinquish the buffer */ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, true); } static struct trace_buffer *temp_buffer; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, unsigned int trace_ctx) { struct ring_buffer_event *entry; struct trace_array *tr = trace_file->tr; int val; *current_rb = tr->array_buffer.buffer; if (!tr->no_filter_buffering_ref && (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { preempt_disable_notrace(); /* * Filtering is on, so try to use the per cpu buffer first. * This buffer will simulate a ring_buffer_event, * where the type_len is zero and the array[0] will * hold the full length. * (see include/linux/ring-buffer.h for details on * how the ring_buffer_event is structured). * * Using a temp buffer during filtering and copying it * on a matched filter is quicker than writing directly * into the ring buffer and then discarding it when * it doesn't match. That is because the discard * requires several atomic operations to get right. * Copying on match and doing nothing on a failed match * is still quicker than no copy on match, but having * to discard out of the ring buffer on a failed match. */ if ((entry = __this_cpu_read(trace_buffered_event))) { int max_len = PAGE_SIZE - struct_size(entry, array, 1); val = this_cpu_inc_return(trace_buffered_event_cnt); /* * Preemption is disabled, but interrupts and NMIs * can still come in now. If that happens after * the above increment, then it will have to go * back to the old method of allocating the event * on the ring buffer, and if the filter fails, it * will have to call ring_buffer_discard_commit() * to remove it. * * Need to also check the unlikely case that the * length is bigger than the temp buffer size. * If that happens, then the reserve is pretty much * guaranteed to fail, as the ring buffer currently * only allows events less than a page. But that may * change in the future, so let the ring buffer reserve * handle the failure in that case. */ if (val == 1 && likely(len <= max_len)) { trace_event_setup(entry, type, trace_ctx); entry->array[0] = len; /* Return with preemption disabled */ return entry; } this_cpu_dec(trace_buffered_event_cnt); } /* __trace_buffer_lock_reserve() disables preemption */ preempt_enable_notrace(); } entry = __trace_buffer_lock_reserve(*current_rb, type, len, trace_ctx); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer * to store the trace event for the trigger to use. It's recursive * safe and will not be recorded anywhere. */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; entry = __trace_buffer_lock_reserve(*current_rb, type, len, trace_ctx); } return entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) { struct trace_event_call *event_call; struct trace_event_file *file; struct trace_event *event; unsigned long flags; struct trace_iterator *iter = tracepoint_print_iter; /* We should never get here if iter is NULL */ if (WARN_ON_ONCE(!iter)) return; event_call = fbuffer->trace_file->event_call; if (!event_call || !event_call->event.funcs || !event_call->event.funcs->trace) return; file = fbuffer->trace_file; if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && !filter_match_preds(file->filter, fbuffer->entry))) return; event = &fbuffer->trace_file->event_call->event; raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); iter->ent = fbuffer->entry; event_call->event.funcs->trace(iter, 0, event); trace_seq_putc(&iter->seq, 0); printk("%s", iter->seq.buffer); raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } int tracepoint_printk_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int save_tracepoint_printk; int ret; guard(mutex)(&tracepoint_printk_mutex); save_tracepoint_printk = tracepoint_printk; ret = proc_dointvec(table, write, buffer, lenp, ppos); /* * This will force exiting early, as tracepoint_printk * is always zero when tracepoint_printk_iter is not allocated */ if (!tracepoint_print_iter) tracepoint_printk = 0; if (save_tracepoint_printk == tracepoint_printk) return ret; if (tracepoint_printk) static_key_enable(&tracepoint_printk_key.key); else static_key_disable(&tracepoint_printk_key.key); return ret; } void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { enum event_trigger_type tt = ETT_NONE; struct trace_event_file *file = fbuffer->trace_file; if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, fbuffer->entry, &tt)) goto discard; if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); if (static_branch_unlikely(&trace_event_exports_enabled)) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); discard: if (tt) event_triggers_post_call(file, tt); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); /* * Skip 3: * * trace_buffer_unlock_commit_regs() * trace_event_buffer_commit() * trace_event_raw_event_xxx() */ # define STACK_SKIP 3 void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, unsigned int trace_ctx, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); /* * If regs is not set, then skip the necessary functions. * Note, we can still get here via blktrace, wakeup tracer * and mmiotrace, but that's ok if they lose a function or * two. They are not that meaningful. */ ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); ftrace_trace_userstack(tr, buffer, trace_ctx); } /* * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. */ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event) { __buffer_unlock_commit(buffer, event); } void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) { struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; int size = sizeof(*entry); size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API if (fregs) { for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) entry->args[i] = ftrace_regs_get_argument(fregs, i); } #endif if (static_branch_unlikely(&trace_function_exports_enabled)) ftrace_exports(event, TRACE_EXPORT_FUNCTION); __buffer_unlock_commit(buffer, event); } #ifdef CONFIG_STACKTRACE /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ #define FTRACE_KSTACK_NESTING 4 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) struct ftrace_stack { unsigned long calls[FTRACE_KSTACK_ENTRIES]; }; struct ftrace_stacks { struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; }; static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); static void __ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs) { struct ring_buffer_event *event; unsigned int size, nr_entries; struct ftrace_stack *fstack; struct stack_entry *entry; int stackidx; int bit; bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); if (bit < 0) return; /* * Add one, for this function and the call to save_stack_trace() * If regs is set, then these functions will not be in the way. */ #ifndef CONFIG_UNWINDER_ORC if (!regs) skip++; #endif guard(preempt_notrace)(); stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; /* This should never happen. If it does, yell once and skip */ if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) goto out; /* * The above __this_cpu_inc_return() is 'atomic' cpu local. An * interrupt will either see the value pre increment or post * increment. If the interrupt happens pre increment it will have * restored the counter when it returns. We just need a barrier to * keep gcc from moving things around. */ barrier(); fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; size = ARRAY_SIZE(fstack->calls); if (regs) { nr_entries = stack_trace_save_regs(regs, fstack->calls, size, skip); } else { nr_entries = stack_trace_save(fstack->calls, size, skip); } #ifdef CONFIG_DYNAMIC_FTRACE /* Mark entry of stack trace as trampoline code */ if (tr->ops && tr->ops->trampoline) { unsigned long tramp_start = tr->ops->trampoline; unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; unsigned long *calls = fstack->calls; for (int i = 0; i < nr_entries; i++) { if (calls[i] >= tramp_start && calls[i] < tramp_end) calls[i] = FTRACE_TRAMPOLINE_MARKER; } } #endif event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, struct_size(entry, caller, nr_entries), trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); entry->size = nr_entries; memcpy(&entry->caller, fstack->calls, flex_array_size(entry, caller, nr_entries)); __buffer_unlock_commit(buffer, event); out: /* Again, don't let gcc optimize things here */ barrier(); __this_cpu_dec(ftrace_stack_reserve); trace_clear_recursion(bit); } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx, int skip, struct pt_regs *regs) { if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) return; __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); } void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip) { struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); return; } if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) return; /* * When an NMI triggers, RCU is enabled via ct_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI * triggered someplace critical, and ct_irq_enter() should * not be called from NMI. */ if (unlikely(in_nmi())) return; ct_irq_enter_irqson(); __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); ct_irq_exit_irqson(); } /** * trace_dump_stack - record a stack back trace in the trace buffer * @skip: Number of functions to skip (helper handlers) */ void trace_dump_stack(int skip) { if (tracing_disabled || tracing_selftest_running) return; #ifndef CONFIG_UNWINDER_ORC /* Skip 1 to skip this function. */ skip++; #endif __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, tracing_gen_ctx(), skip, NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); #ifdef CONFIG_USER_STACKTRACE_SUPPORT static DEFINE_PER_CPU(int, user_stack_count); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx) { struct ring_buffer_event *event; struct userstack_entry *entry; if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) return; /* * NMIs can not handle page faults, even with fix ups. * The save user stack can (and often does) fault. */ if (unlikely(in_nmi())) return; /* * prevent recursion, since the user stack tracing may * trigger other kernel events. */ guard(preempt)(); if (__this_cpu_read(user_stack_count)) return; __this_cpu_inc(user_stack_count); event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), trace_ctx); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); entry->tgid = current->tgid; memset(&entry->caller, 0, sizeof(entry->caller)); stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); __buffer_unlock_commit(buffer, event); out_drop_count: __this_cpu_dec(user_stack_count); } #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, unsigned int trace_ctx) { } #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ #endif /* CONFIG_STACKTRACE */ static inline void func_repeats_set_delta_ts(struct func_repeats_entry *entry, unsigned long long delta) { entry->bottom_delta_ts = delta & U32_MAX; entry->top_delta_ts = (delta >> 32); } void trace_last_func_repeats(struct trace_array *tr, struct trace_func_repeats *last_info, unsigned int trace_ctx) { struct trace_buffer *buffer = tr->array_buffer.buffer; struct func_repeats_entry *entry; struct ring_buffer_event *event; u64 delta; event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, sizeof(*entry), trace_ctx); if (!event) return; delta = ring_buffer_event_time_stamp(buffer, event) - last_info->ts_last_call; entry = ring_buffer_event_data(event); entry->ip = last_info->ip; entry->parent_ip = last_info->parent_ip; entry->count = last_info->count; func_repeats_set_delta_ts(entry, delta); __buffer_unlock_commit(buffer, event); } /* created for use with alloc_percpu */ struct trace_buffer_struct { int nesting; char buffer[4][TRACE_BUF_SIZE]; }; static struct trace_buffer_struct __percpu *trace_percpu_buffer; /* * This allows for lockless recording. If we're nested too deeply, then * this returns NULL. */ static char *get_trace_buf(void) { struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); if (!trace_percpu_buffer || buffer->nesting >= 4) return NULL; buffer->nesting++; /* Interrupts must see nesting incremented before we use the buffer */ barrier(); return &buffer->buffer[buffer->nesting - 1][0]; } static void put_trace_buf(void) { /* Don't let the decrement of nesting leak before this */ barrier(); this_cpu_dec(trace_percpu_buffer->nesting); } static int alloc_percpu_trace_buffer(void) { struct trace_buffer_struct __percpu *buffers; if (trace_percpu_buffer) return 0; buffers = alloc_percpu(struct trace_buffer_struct); if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) return -ENOMEM; trace_percpu_buffer = buffers; return 0; } static int buffers_allocated; void trace_printk_init_buffers(void) { if (buffers_allocated) return; if (alloc_percpu_trace_buffer()) return; /* trace_printk() is for debug use only. Don't use it in production. */ pr_warn("\n"); pr_warn("**********************************************************\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("** **\n"); pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); pr_warn("** **\n"); pr_warn("** This means that this is a DEBUG kernel and it is **\n"); pr_warn("** unsafe for production use. **\n"); pr_warn("** **\n"); pr_warn("** If you see this message and you are not debugging **\n"); pr_warn("** the kernel, report this immediately to your vendor! **\n"); pr_warn("** **\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("**********************************************************\n"); /* Expand the buffers to set size */ tracing_update_buffers(&global_trace); buffers_allocated = 1; /* * trace_printk_init_buffers() can be called by modules. * If that happens, then we need to start cmdline recording * directly here. If the global_trace.buffer is already * allocated here, then this was called by module code. */ if (global_trace.array_buffer.buffer) tracing_start_cmdline_record(); } EXPORT_SYMBOL_GPL(trace_printk_init_buffers); void trace_printk_start_comm(void) { /* Start tracing comms if trace printk is set */ if (!buffers_allocated) return; tracing_start_cmdline_record(); } static void trace_printk_start_stop_comm(int enabled) { if (!buffers_allocated) return; if (enabled) tracing_start_cmdline_record(); else tracing_stop_cmdline_record(); } /** * trace_vbprintk - write binary msg to tracing buffer * @ip: The address of the caller * @fmt: The string format to write to the buffer * @args: Arguments for @fmt */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { struct ring_buffer_event *event; struct trace_buffer *buffer; struct trace_array *tr = READ_ONCE(printk_trace); struct bprint_entry *entry; unsigned int trace_ctx; char *tbuffer; int len = 0, size; if (!printk_binsafe(tr)) return trace_vprintk(ip, fmt, args); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); trace_ctx = tracing_gen_ctx(); guard(preempt_notrace)(); tbuffer = get_trace_buf(); if (!tbuffer) { len = 0; goto out_nobuffer; } len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out_put; size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; scoped_guard(ring_buffer_nest, buffer) { event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, trace_ctx); if (!event) goto out_put; entry = ring_buffer_event_data(event); entry->ip = ip; entry->fmt = fmt; memcpy(entry->buf, tbuffer, sizeof(u32) * len); __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); } out_put: put_trace_buf(); out_nobuffer: unpause_graph_tracing(); return len; } EXPORT_SYMBOL_GPL(trace_vbprintk); static __printf(3, 0) int __trace_array_vprintk(struct trace_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { struct ring_buffer_event *event; int len = 0, size; struct print_entry *entry; unsigned int trace_ctx; char *tbuffer; if (tracing_disabled) return 0; /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); trace_ctx = tracing_gen_ctx(); guard(preempt_notrace)(); tbuffer = get_trace_buf(); if (!tbuffer) { len = 0; goto out_nobuffer; } len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; scoped_guard(ring_buffer_nest, buffer) { event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); entry->ip = ip; memcpy(&entry->buf, tbuffer, len + 1); __buffer_unlock_commit(buffer, event); ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); } out: put_trace_buf(); out_nobuffer: unpause_graph_tracing(); return len; } int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { if (tracing_selftest_running && tr == &global_trace) return 0; return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); } /** * trace_array_printk - Print a message to a specific instance * @tr: The instance trace_array descriptor * @ip: The instruction pointer that this is called from. * @fmt: The format to print (printf format) * * If a subsystem sets up its own instance, they have the right to * printk strings into their tracing instance buffer using this * function. Note, this function will not write into the top level * buffer (use trace_printk() for that), as writing into the top level * buffer should only have events that can be individually disabled. * trace_printk() is only used for debugging a kernel, and should not * be ever incorporated in normal use. * * trace_array_printk() can be used, as it will not add noise to the * top level tracing buffer. * * Note, trace_array_init_printk() must be called on @tr before this * can be used. */ int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { int ret; va_list ap; if (!tr) return -ENOENT; /* This is only allowed for created instances */ if (tr == &global_trace) return 0; if (!(tr->trace_flags & TRACE_ITER(PRINTK))) return 0; va_start(ap, fmt); ret = trace_array_vprintk(tr, ip, fmt, ap); va_end(ap); return ret; } EXPORT_SYMBOL_GPL(trace_array_printk); /** * trace_array_init_printk - Initialize buffers for trace_array_printk() * @tr: The trace array to initialize the buffers for * * As trace_array_printk() only writes into instances, they are OK to * have in the kernel (unlike trace_printk()). This needs to be called * before trace_array_printk() can be used on a trace_array. */ int trace_array_init_printk(struct trace_array *tr) { if (!tr) return -ENOENT; /* This is only allowed for created instances */ if (tr == &global_trace) return -EINVAL; return alloc_percpu_trace_buffer(); } EXPORT_SYMBOL_GPL(trace_array_init_printk); int trace_array_printk_buf(struct trace_buffer *buffer, unsigned long ip, const char *fmt, ...) { int ret; va_list ap; if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) return 0; va_start(ap, fmt); ret = __trace_array_vprintk(buffer, ip, fmt, ap); va_end(ap); return ret; } int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(printk_trace, ip, fmt, args); } EXPORT_SYMBOL_GPL(trace_vprintk); static void trace_iterator_increment(struct trace_iterator *iter) { struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); iter->idx++; if (buf_iter) ring_buffer_iter_advance(buf_iter); } static struct trace_entry * peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events) { struct ring_buffer_event *event; struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); if (buf_iter) { event = ring_buffer_iter_peek(buf_iter, ts); if (lost_events) *lost_events = ring_buffer_iter_dropped(buf_iter) ? (unsigned long)-1 : 0; } else { event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, lost_events); } if (event) { iter->ent_size = ring_buffer_event_length(event); return ring_buffer_event_data(event); } iter->ent_size = 0; return NULL; } static struct trace_entry * __find_next_entry(struct trace_iterator *iter, int *ent_cpu, unsigned long *missing_events, u64 *ent_ts) { struct trace_buffer *buffer = iter->array_buffer->buffer; struct trace_entry *ent, *next = NULL; unsigned long lost_events = 0, next_lost = 0; int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; int next_size = 0; int cpu; /* * If we are in a per_cpu trace file, don't bother by iterating over * all cpu and peek directly. */ if (cpu_file > RING_BUFFER_ALL_CPUS) { if (ring_buffer_empty_cpu(buffer, cpu_file)) return NULL; ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); if (ent_cpu) *ent_cpu = cpu_file; return ent; } for_each_tracing_cpu(cpu) { if (ring_buffer_empty_cpu(buffer, cpu)) continue; ent = peek_next_entry(iter, cpu, &ts, &lost_events); /* * Pick the entry with the smallest timestamp: */ if (ent && (!next || ts < next_ts)) { next = ent; next_cpu = cpu; next_ts = ts; next_lost = lost_events; next_size = iter->ent_size; } } iter->ent_size = next_size; if (ent_cpu) *ent_cpu = next_cpu; if (ent_ts) *ent_ts = next_ts; if (missing_events) *missing_events = next_lost; return next; } #define STATIC_FMT_BUF_SIZE 128 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; char *trace_iter_expand_format(struct trace_iterator *iter) { char *tmp; /* * iter->tr is NULL when used with tp_printk, which makes * this get called where it is not safe to call krealloc(). */ if (!iter->tr || iter->fmt == static_fmt_buf) return NULL; tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, GFP_KERNEL); if (tmp) { iter->fmt_size += STATIC_FMT_BUF_SIZE; iter->fmt = tmp; } return tmp; } /* Returns true if the string is safe to dereference from an event */ static bool trace_safe_str(struct trace_iterator *iter, const char *str) { unsigned long addr = (unsigned long)str; struct trace_event *trace_event; struct trace_event_call *event; /* OK if part of the event data */ if ((addr >= (unsigned long)iter->ent) && (addr < (unsigned long)iter->ent + iter->ent_size)) return true; /* OK if part of the temp seq buffer */ if ((addr >= (unsigned long)iter->tmp_seq.buffer) && (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) return true; /* Core rodata can not be freed */ if (is_kernel_rodata(addr)) return true; if (trace_is_tracepoint_string(str)) return true; /* * Now this could be a module event, referencing core module * data, which is OK. */ if (!iter->ent) return false; trace_event = ftrace_find_event(iter->ent->type); if (!trace_event) return false; event = container_of(trace_event, struct trace_event_call, event); if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) return false; /* Would rather have rodata, but this will suffice */ if (within_module_core(addr, event->module)) return true; return false; } /** * ignore_event - Check dereferenced fields while writing to the seq buffer * @iter: The iterator that holds the seq buffer and the event being printed * * At boot up, test_event_printk() will flag any event that dereferences * a string with "%s" that does exist in the ring buffer. It may still * be valid, as the string may point to a static string in the kernel * rodata that never gets freed. But if the string pointer is pointing * to something that was allocated, there's a chance that it can be freed * by the time the user reads the trace. This would cause a bad memory * access by the kernel and possibly crash the system. * * This function will check if the event has any fields flagged as needing * to be checked at runtime and perform those checks. * * If it is found that a field is unsafe, it will write into the @iter->seq * a message stating what was found to be unsafe. * * @return: true if the event is unsafe and should be ignored, * false otherwise. */ bool ignore_event(struct trace_iterator *iter) { struct ftrace_event_field *field; struct trace_event *trace_event; struct trace_event_call *event; struct list_head *head; struct trace_seq *seq; const void *ptr; trace_event = ftrace_find_event(iter->ent->type); seq = &iter->seq; if (!trace_event) { trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); return true; } event = container_of(trace_event, struct trace_event_call, event); if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) return false; head = trace_get_fields(event); if (!head) { trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", trace_event_name(event)); return true; } /* Offsets are from the iter->ent that points to the raw event */ ptr = iter->ent; list_for_each_entry(field, head, link) { const char *str; bool good; if (!field->needs_test) continue; str = *(const char **)(ptr + field->offset); good = trace_safe_str(iter, str); /* * If you hit this warning, it is likely that the * trace event in question used %s on a string that * was saved at the time of the event, but may not be * around when the trace is read. Use __string(), * __assign_str() and __get_str() helpers in the TRACE_EVENT() * instead. See samples/trace_events/trace-events-sample.h * for reference. */ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", trace_event_name(event), field->name)) { trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", trace_event_name(event), field->name); return true; } } return false; } const char *trace_event_format(struct trace_iterator *iter, const char *fmt) { const char *p, *new_fmt; char *q; if (WARN_ON_ONCE(!fmt)) return fmt; if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) return fmt; p = fmt; new_fmt = q = iter->fmt; while (*p) { if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { if (!trace_iter_expand_format(iter)) return fmt; q += iter->fmt - new_fmt; new_fmt = iter->fmt; } *q++ = *p++; /* Replace %p with %px */ if (p[-1] == '%') { if (p[0] == '%') { *q++ = *p++; } else if (p[0] == 'p' && !isalnum(p[1])) { *q++ = *p++; *q++ = 'x'; } } } *q = '\0'; return new_fmt; } #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); /* Find the next real entry, without updating the iterator itself */ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { /* __find_next_entry will reset ent_size */ int ent_size = iter->ent_size; struct trace_entry *entry; /* * If called from ftrace_dump(), then the iter->temp buffer * will be the static_temp_buf and not created from kmalloc. * If the entry size is greater than the buffer, we can * not save it. Just return NULL in that case. This is only * used to add markers when two consecutive events' time * stamps have a large delta. See trace_print_lat_context() */ if (iter->temp == static_temp_buf && STATIC_TEMP_BUF_SIZE < ent_size) return NULL; /* * The __find_next_entry() may call peek_next_entry(), which may * call ring_buffer_peek() that may make the contents of iter->ent * undefined. Need to copy iter->ent now. */ if (iter->ent && iter->ent != iter->temp) { if ((!iter->temp || iter->temp_size < iter->ent_size) && !WARN_ON_ONCE(iter->temp == static_temp_buf)) { void *temp; temp = kmalloc(iter->ent_size, GFP_KERNEL); if (!temp) return NULL; kfree(iter->temp); iter->temp = temp; iter->temp_size = iter->ent_size; } memcpy(iter->temp, iter->ent, iter->ent_size); iter->ent = iter->temp; } entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); /* Put back the original ent_size */ iter->ent_size = ent_size; return entry; } /* Find the next real entry, and increment the iterator to the next entry */ void *trace_find_next_entry_inc(struct trace_iterator *iter) { iter->ent = __find_next_entry(iter, &iter->cpu, &iter->lost_events, &iter->ts); if (iter->ent) trace_iterator_increment(iter); return iter->ent ? iter : NULL; } static void trace_consume(struct trace_iterator *iter) { ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, &iter->lost_events); } static void *s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_iterator *iter = m->private; int i = (int)*pos; void *ent; WARN_ON_ONCE(iter->leftover); (*pos)++; /* can't go backwards */ if (iter->idx > i) return NULL; if (iter->idx < 0) ent = trace_find_next_entry_inc(iter); else ent = iter; while (ent && iter->idx < i) ent = trace_find_next_entry_inc(iter); iter->pos = *pos; return ent; } void tracing_iter_reset(struct trace_iterator *iter, int cpu) { struct ring_buffer_iter *buf_iter; unsigned long entries = 0; u64 ts; per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; buf_iter = trace_buffer_iter(iter, cpu); if (!buf_iter) return; ring_buffer_iter_reset(buf_iter); /* * We could have the case with the max latency tracers * that a reset never took place on a cpu. This is evident * by the timestamp being before the start of the buffer. */ while (ring_buffer_iter_peek(buf_iter, &ts)) { if (ts >= iter->array_buffer->time_start) break; entries++; ring_buffer_iter_advance(buf_iter); /* This could be a big loop */ cond_resched(); } per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; } /* * The current tracer is copied to avoid a global locking * all around. */ static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; struct trace_array *tr = iter->tr; int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; mutex_lock(&trace_types_lock); if (unlikely(tr->current_trace != iter->trace)) { /* Close iter->trace before switching to the new current tracer */ if (iter->trace->close) iter->trace->close(iter); iter->trace = tr->current_trace; /* Reopen the new current tracer */ if (iter->trace->open) iter->trace->open(iter); } mutex_unlock(&trace_types_lock); #ifdef CONFIG_TRACER_MAX_TRACE if (iter->snapshot && iter->trace->use_max_tr) return ERR_PTR(-EBUSY); #endif if (*pos != iter->pos) { iter->ent = NULL; iter->cpu = 0; iter->idx = -1; if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) tracing_iter_reset(iter, cpu); } else tracing_iter_reset(iter, cpu_file); iter->leftover = 0; for (p = iter; p && l < *pos; p = s_next(m, p, &l)) ; } else { /* * If we overflowed the seq_file before, then we want * to just reuse the trace_seq buffer again. */ if (iter->leftover) p = iter; else { l = *pos - 1; p = s_next(m, p, &l); } } trace_event_read_lock(); trace_access_lock(cpu_file); return p; } static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; #ifdef CONFIG_TRACER_MAX_TRACE if (iter->snapshot && iter->trace->use_max_tr) return; #endif trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } static void get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, unsigned long *entries, int cpu) { unsigned long count; count = ring_buffer_entries_cpu(buf->buffer, cpu); /* * If this buffer has skipped entries, then we hold all * entries for the trace and we need to ignore the * ones before the time stamp. */ if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; /* total is the same as the entries */ *total = count; } else *total = count + ring_buffer_overrun_cpu(buf->buffer, cpu); *entries = count; } static void get_total_entries(struct array_buffer *buf, unsigned long *total, unsigned long *entries) { unsigned long t, e; int cpu; *total = 0; *entries = 0; for_each_tracing_cpu(cpu) { get_total_entries_cpu(buf, &t, &e, cpu); *total += t; *entries += e; } } unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) { unsigned long total, entries; if (!tr) tr = &global_trace; get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); return entries; } unsigned long trace_total_entries(struct trace_array *tr) { unsigned long total, entries; if (!tr) tr = &global_trace; get_total_entries(&tr->array_buffer, &total, &entries); return entries; } static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" "# / _-----=> irqs-off/BH-disabled\n" "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" "# |||| / _-=> migrate-disable \n" "# ||||| / delay \n" "# cmd pid |||||| time | caller \n" "# \\ / |||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) { unsigned long total; unsigned long entries; get_total_entries(buf, &total, &entries); seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", entries, total, num_online_cpus()); seq_puts(m, "#\n"); } static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER(RECORD_TGID); print_event_info(buf, m); seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER(RECORD_TGID); static const char space[] = " "; int prec = tgid ? 12 : 2; print_event_info(buf, m); seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); seq_printf(m, "# %.*s|||| / delay\n", prec, space); seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); } void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); struct array_buffer *buf = iter->array_buffer; struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); struct tracer *type = iter->trace; unsigned long entries; unsigned long total; const char *name = type->name; get_total_entries(buf, &total, &entries); seq_printf(m, "# %s latency trace v1.1.5 on %s\n", name, init_utsname()->release); seq_puts(m, "# -----------------------------------" "---------------------------------\n"); seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" " (M:%s VP:%d, KP:%d, SP:%d HP:%d", nsecs_to_usecs(data->saved_latency), entries, total, buf->cpu, preempt_model_str(), /* These are reserved for later use */ 0, 0, 0, 0); #ifdef CONFIG_SMP seq_printf(m, " #P:%d)\n", num_online_cpus()); #else seq_puts(m, ")\n"); #endif seq_puts(m, "# -----------------\n"); seq_printf(m, "# | task: %.16s-%d " "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", data->comm, data->pid, from_kuid_munged(seq_user_ns(m), data->uid), data->nice, data->policy, data->rt_priority); seq_puts(m, "# -----------------\n"); if (data->critical_start) { seq_puts(m, "# => started at: "); seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); trace_print_seq(m, &iter->seq); seq_puts(m, "\n# => ended at: "); seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); trace_print_seq(m, &iter->seq); seq_puts(m, "\n#\n"); } seq_puts(m, "#\n"); } static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_array *tr = iter->tr; if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) return; if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) return; if (cpumask_available(iter->started) && cpumask_test_cpu(iter->cpu, iter->started)) return; if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) return; if (cpumask_available(iter->started)) cpumask_set_cpu(iter->cpu, iter->started); /* Don't print started cpu buffer for the first entry of the trace */ if (iter->idx > 1) trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } #ifdef CONFIG_FTRACE_SYSCALLS static bool is_syscall_event(struct trace_event *event) { return (event->funcs == &enter_syscall_print_funcs) || (event->funcs == &exit_syscall_print_funcs); } #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT #else static inline bool is_syscall_event(struct trace_event *event) { return false; } #define syscall_buf_size 0 #endif /* CONFIG_FTRACE_SYSCALLS */ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) { struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; struct trace_event *event; entry = iter->ent; test_cpu_buff_start(iter); event = ftrace_find_event(entry->type); if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { if (iter->iter_flags & TRACE_FILE_LAT_FMT) trace_print_lat_context(iter); else trace_print_context(iter); } if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; if (event) { if (tr->trace_flags & TRACE_ITER(FIELDS)) return print_event_fields(iter, event); /* * For TRACE_EVENT() events, the print_fmt is not * safe to use if the array has delta offsets * Force printing via the fields. */ if ((tr->text_delta)) { /* ftrace and system call events are still OK */ if ((event->type > __TRACE_LAST_TYPE) && !is_syscall_event(event)) return print_event_fields(iter, event); } return event->funcs->trace(iter, sym_flags, event); } trace_seq_printf(s, "Unknown type %d\n", entry->type); return trace_handle_return(s); } static enum print_line_t print_raw_fmt(struct trace_iterator *iter) { struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *entry; struct trace_event *event; entry = iter->ent; if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) trace_seq_printf(s, "%d %d %llu ", entry->pid, iter->cpu, iter->ts); if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; event = ftrace_find_event(entry->type); if (event) return event->funcs->raw(iter, 0, event); trace_seq_printf(s, "%d ?\n", entry->type); return trace_handle_return(s); } static enum print_line_t print_hex_fmt(struct trace_iterator *iter) { struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; struct trace_entry *entry; struct trace_event *event; entry = iter->ent; if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { SEQ_PUT_HEX_FIELD(s, entry->pid); SEQ_PUT_HEX_FIELD(s, iter->cpu); SEQ_PUT_HEX_FIELD(s, iter->ts); if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; } event = ftrace_find_event(entry->type); if (event) { enum print_line_t ret = event->funcs->hex(iter, 0, event); if (ret != TRACE_TYPE_HANDLED) return ret; } SEQ_PUT_FIELD(s, newline); return trace_handle_return(s); } static enum print_line_t print_bin_fmt(struct trace_iterator *iter) { struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *entry; struct trace_event *event; entry = iter->ent; if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { SEQ_PUT_FIELD(s, entry->pid); SEQ_PUT_FIELD(s, iter->cpu); SEQ_PUT_FIELD(s, iter->ts); if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; } event = ftrace_find_event(entry->type); return event ? event->funcs->binary(iter, 0, event) : TRACE_TYPE_HANDLED; } int trace_empty(struct trace_iterator *iter) { struct ring_buffer_iter *buf_iter; int cpu; /* If we are looking at one CPU buffer, only check that one */ if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { cpu = iter->cpu_file; buf_iter = trace_buffer_iter(iter, cpu); if (buf_iter) { if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } return 1; } for_each_tracing_cpu(cpu) { buf_iter = trace_buffer_iter(iter, cpu); if (buf_iter) { if (!ring_buffer_iter_empty(buf_iter)) return 0; } else { if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) return 0; } } return 1; } /* Called with trace_event_read_lock() held. */ enum print_line_t print_trace_line(struct trace_iterator *iter) { struct trace_array *tr = iter->tr; unsigned long trace_flags = tr->trace_flags; enum print_line_t ret; if (iter->lost_events) { if (iter->lost_events == (unsigned long)-1) trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", iter->cpu); else trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", iter->cpu, iter->lost_events); if (trace_seq_has_overflowed(&iter->seq)) return TRACE_TYPE_PARTIAL_LINE; } if (iter->trace && iter->trace->print_line) { ret = iter->trace->print_line(iter); if (ret != TRACE_TYPE_UNHANDLED) return ret; } if (iter->ent->type == TRACE_BPUTS && trace_flags & TRACE_ITER(PRINTK) && trace_flags & TRACE_ITER(PRINTK_MSGONLY)) return trace_print_bputs_msg_only(iter); if (iter->ent->type == TRACE_BPRINT && trace_flags & TRACE_ITER(PRINTK) && trace_flags & TRACE_ITER(PRINTK_MSGONLY)) return trace_print_bprintk_msg_only(iter); if (iter->ent->type == TRACE_PRINT && trace_flags & TRACE_ITER(PRINTK) && trace_flags & TRACE_ITER(PRINTK_MSGONLY)) return trace_print_printk_msg_only(iter); if (trace_flags & TRACE_ITER(BIN)) return print_bin_fmt(iter); if (trace_flags & TRACE_ITER(HEX)) return print_hex_fmt(iter); if (trace_flags & TRACE_ITER(RAW)) return print_raw_fmt(iter); return print_trace_fmt(iter); } void trace_latency_header(struct seq_file *m) { struct trace_iterator *iter = m->private; struct trace_array *tr = iter->tr; /* print nothing if the buffers are empty */ if (trace_empty(iter)) return; if (iter->iter_flags & TRACE_FILE_LAT_FMT) print_trace_header(m, iter); if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) print_lat_help_header(m); } void trace_default_header(struct seq_file *m) { struct trace_iterator *iter = m->private; struct trace_array *tr = iter->tr; unsigned long trace_flags = tr->trace_flags; if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) return; if (iter->iter_flags & TRACE_FILE_LAT_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return; print_trace_header(m, iter); if (!(trace_flags & TRACE_ITER(VERBOSE))) print_lat_help_header(m); } else { if (!(trace_flags & TRACE_ITER(VERBOSE))) { if (trace_flags & TRACE_ITER(IRQ_INFO)) print_func_help_header_irq(iter->array_buffer, m, trace_flags); else print_func_help_header(iter->array_buffer, m, trace_flags); } } } static void test_ftrace_alive(struct seq_file *m) { if (!ftrace_is_dead()) return; seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" "# MAY BE MISSING FUNCTION EVENTS\n"); } #ifdef CONFIG_TRACER_MAX_TRACE static void show_snapshot_main_help(struct seq_file *m) { seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" "# Takes a snapshot of the main buffer.\n" "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" "# (Doesn't have to be '2' works with any number that\n" "# is not a '0' or '1')\n"); } static void show_snapshot_percpu_help(struct seq_file *m) { seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" "# Takes a snapshot of the main buffer for this cpu.\n"); #else seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" "# Must use main snapshot file to allocate.\n"); #endif seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" "# (Doesn't have to be '2' works with any number that\n" "# is not a '0' or '1')\n"); } static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { if (iter->tr->allocated_snapshot) seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); else seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); seq_puts(m, "# Snapshot commands:\n"); if (iter->cpu_file == RING_BUFFER_ALL_CPUS) show_snapshot_main_help(m); else show_snapshot_percpu_help(m); } #else /* Should never be called */ static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } #endif static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; int ret; if (iter->ent == NULL) { if (iter->tr) { seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); test_ftrace_alive(m); } if (iter->snapshot && trace_empty(iter)) print_snapshot_help(m, iter); else if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); else trace_default_header(m); } else if (iter->leftover) { /* * If we filled the seq_file buffer earlier, we * want to just show it now. */ ret = trace_print_seq(m, &iter->seq); /* ret should this time be zero, but you never know */ iter->leftover = ret; } else { ret = print_trace_line(iter); if (ret == TRACE_TYPE_PARTIAL_LINE) { iter->seq.full = 0; trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); } ret = trace_print_seq(m, &iter->seq); /* * If we overflow the seq_file buffer, then it will * ask us for this data again at start up. * Use that instead. * ret is 0 if seq_file write succeeded. * -1 otherwise. */ iter->leftover = ret; } return 0; } /* * Should be used after trace_array_get(), trace_types_lock * ensures that i_cdev was already initialized. */ static inline int tracing_get_cpu(struct inode *inode) { if (inode->i_cdev) /* See trace_create_cpu_file() */ return (long)inode->i_cdev - 1; return RING_BUFFER_ALL_CPUS; } static const struct seq_operations tracer_seq_ops = { .start = s_start, .next = s_next, .stop = s_stop, .show = s_show, }; /* * Note, as iter itself can be allocated and freed in different * ways, this function is only used to free its content, and not * the iterator itself. The only requirement to all the allocations * is that it must zero all fields (kzalloc), as freeing works with * ethier allocated content or NULL. */ static void free_trace_iter_content(struct trace_iterator *iter) { /* The fmt is either NULL, allocated or points to static_fmt_buf */ if (iter->fmt != static_fmt_buf) kfree(iter->fmt); kfree(iter->temp); kfree(iter->buffer_iter); mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); } static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, bool snapshot) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int cpu; if (tracing_disabled) return ERR_PTR(-ENODEV); iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); if (!iter) return ERR_PTR(-ENOMEM); iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), GFP_KERNEL); if (!iter->buffer_iter) goto release; /* * trace_find_next_entry() may need to save off iter->ent. * It will place it into the iter->temp buffer. As most * events are less than 128, allocate a buffer of that size. * If one is greater, then trace_find_next_entry() will * allocate a new buffer to adjust for the bigger iter->ent. * It's not critical if it fails to get allocated here. */ iter->temp = kmalloc(128, GFP_KERNEL); if (iter->temp) iter->temp_size = 128; /* * trace_event_printf() may need to modify given format * string to replace %p with %px so that it shows real address * instead of hash value. However, that is only for the event * tracing, other tracer may not need. Defer the allocation * until it is needed. */ iter->fmt = NULL; iter->fmt_size = 0; mutex_lock(&trace_types_lock); iter->trace = tr->current_trace; if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; iter->tr = tr; #ifdef CONFIG_TRACER_MAX_TRACE /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) iter->array_buffer = &tr->max_buffer; else #endif iter->array_buffer = &tr->array_buffer; iter->snapshot = snapshot; iter->pos = -1; iter->cpu_file = tracing_get_cpu(inode); mutex_init(&iter->mutex); /* Notify the tracer early; before we stop tracing. */ if (iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; /* * If pause-on-trace is enabled, then stop the trace while * dumping, unless this is the "snapshot" file */ if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) { iter->iter_flags |= TRACE_FILE_PAUSE; tracing_stop_tr(tr); } if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { iter->buffer_iter[cpu] = ring_buffer_read_start(iter->array_buffer->buffer, cpu, GFP_KERNEL); tracing_iter_reset(iter, cpu); } } else { cpu = iter->cpu_file; iter->buffer_iter[cpu] = ring_buffer_read_start(iter->array_buffer->buffer, cpu, GFP_KERNEL); tracing_iter_reset(iter, cpu); } mutex_unlock(&trace_types_lock); return iter; fail: mutex_unlock(&trace_types_lock); free_trace_iter_content(iter); release: seq_release_private(inode, file); return ERR_PTR(-ENOMEM); } int tracing_open_generic(struct inode *inode, struct file *filp) { int ret; ret = tracing_check_open_get_tr(NULL); if (ret) return ret; filp->private_data = inode->i_private; return 0; } bool tracing_is_disabled(void) { return (tracing_disabled) ? true: false; } /* * Open and update trace_array ref count. * Must have the current trace_array passed to it. */ int tracing_open_generic_tr(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; filp->private_data = inode->i_private; return 0; } /* * The private pointer of the inode is the trace_event_file. * Update the tr ref count associated to it. */ int tracing_open_file_tr(struct inode *inode, struct file *filp) { struct trace_event_file *file = inode->i_private; int ret; ret = tracing_check_open_get_tr(file->tr); if (ret) return ret; guard(mutex)(&event_mutex); /* Fail if the file is marked for removal */ if (file->flags & EVENT_FILE_FL_FREED) { trace_array_put(file->tr); return -ENODEV; } else { event_file_get(file); } filp->private_data = inode->i_private; return 0; } int tracing_release_file_tr(struct inode *inode, struct file *filp) { struct trace_event_file *file = inode->i_private; trace_array_put(file->tr); event_file_put(file); return 0; } int tracing_single_release_file_tr(struct inode *inode, struct file *filp) { tracing_release_file_tr(inode, filp); return single_release(inode, filp); } static int tracing_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; struct seq_file *m = file->private_data; struct trace_iterator *iter; int cpu; if (!(file->f_mode & FMODE_READ)) { trace_array_put(tr); return 0; } /* Writes do not use seq_file */ iter = m->private; mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } if (iter->trace && iter->trace->close) iter->trace->close(iter); if (iter->iter_flags & TRACE_FILE_PAUSE) /* reenable tracing if it was previously enabled */ tracing_start_tr(tr); __trace_array_put(tr); mutex_unlock(&trace_types_lock); free_trace_iter_content(iter); seq_release_private(inode, file); return 0; } int tracing_release_generic_tr(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; trace_array_put(tr); return 0; } static int tracing_single_release_tr(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; trace_array_put(tr); return single_release(inode, file); } static int tracing_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); struct array_buffer *trace_buf = &tr->array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE if (tr->current_trace->print_max) trace_buf = &tr->max_buffer; #endif if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(trace_buf); else tracing_reset_cpu(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) iter->iter_flags |= TRACE_FILE_LAT_FMT; } if (ret < 0) trace_array_put(tr); return ret; } /* * Some tracers are not suitable for instance buffers. * A tracer is always available for the global array (toplevel) * or if it explicitly states that it is. */ static bool trace_ok_for_array(struct tracer *t, struct trace_array *tr) { #ifdef CONFIG_TRACER_SNAPSHOT /* arrays with mapped buffer range do not have snapshots */ if (tr->range_addr_start && t->use_max_tr) return false; #endif return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; } /* Find the next tracer that this trace array may use */ static struct tracer * get_tracer_for_array(struct trace_array *tr, struct tracer *t) { while (t && !trace_ok_for_array(t, tr)) t = t->next; return t; } static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_array *tr = m->private; struct tracer *t = v; (*pos)++; if (t) t = get_tracer_for_array(tr, t->next); return t; } static void *t_start(struct seq_file *m, loff_t *pos) { struct trace_array *tr = m->private; struct tracer *t; loff_t l = 0; mutex_lock(&trace_types_lock); t = get_tracer_for_array(tr, trace_types); for (; t && l < *pos; t = t_next(m, t, &l)) ; return t; } static void t_stop(struct seq_file *m, void *p) { mutex_unlock(&trace_types_lock); } static int t_show(struct seq_file *m, void *v) { struct tracer *t = v; if (!t) return 0; seq_puts(m, t->name); if (t->next) seq_putc(m, ' '); else seq_putc(m, '\n'); return 0; } static const struct seq_operations show_traces_seq_ops = { .start = t_start, .next = t_next, .stop = t_stop, .show = t_show, }; static int show_traces_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; struct seq_file *m; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; ret = seq_open(file, &show_traces_seq_ops); if (ret) { trace_array_put(tr); return ret; } m = file->private_data; m->private = tr; return 0; } static int tracing_seq_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; trace_array_put(tr); return seq_release(inode, file); } static ssize_t tracing_write_stub(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { return count; } loff_t tracing_lseek(struct file *file, loff_t offset, int whence) { int ret; if (file->f_mode & FMODE_READ) ret = seq_lseek(file, offset, whence); else file->f_pos = ret = 0; return ret; } static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .read_iter = seq_read_iter, .splice_read = copy_splice_read, .write = tracing_write_stub, .llseek = tracing_lseek, .release = tracing_release, }; static const struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_seq_release, }; static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; char *mask_str __free(kfree) = NULL; int len; len = snprintf(NULL, 0, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)) + 1; mask_str = kmalloc(len, GFP_KERNEL); if (!mask_str) return -ENOMEM; len = snprintf(mask_str, len, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)); if (len >= count) return -EINVAL; return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); } int tracing_set_cpumask(struct trace_array *tr, cpumask_var_t tracing_cpumask_new) { int cpu; if (!tr) return -EINVAL; local_irq_disable(); arch_spin_lock(&tr->max_lock); for_each_tracing_cpu(cpu) { /* * Increase/decrease the disabled counter if we are * about to flip a bit in the cpumask: */ if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); #endif } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); #endif } } arch_spin_unlock(&tr->max_lock); local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); return 0; } static ssize_t tracing_cpumask_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; cpumask_var_t tracing_cpumask_new; int err; if (count == 0 || count > KMALLOC_MAX_SIZE) return -EINVAL; if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); if (err) goto err_free; err = tracing_set_cpumask(tr, tracing_cpumask_new); if (err) goto err_free; free_cpumask_var(tracing_cpumask_new); return count; err_free: free_cpumask_var(tracing_cpumask_new); return err; } static const struct file_operations tracing_cpumask_fops = { .open = tracing_open_generic_tr, .read = tracing_cpumask_read, .write = tracing_cpumask_write, .release = tracing_release_generic_tr, .llseek = generic_file_llseek, }; static int tracing_trace_options_show(struct seq_file *m, void *v) { struct tracer_opt *trace_opts; struct trace_array *tr = m->private; struct tracer_flags *flags; u32 tracer_flags; int i; guard(mutex)(&trace_types_lock); for (i = 0; trace_options[i]; i++) { if (tr->trace_flags & (1ULL << i)) seq_printf(m, "%s\n", trace_options[i]); else seq_printf(m, "no%s\n", trace_options[i]); } flags = tr->current_trace_flags; if (!flags || !flags->opts) return 0; tracer_flags = flags->val; trace_opts = flags->opts; for (i = 0; trace_opts[i].name; i++) { if (tracer_flags & trace_opts[i].bit) seq_printf(m, "%s\n", trace_opts[i].name); else seq_printf(m, "no%s\n", trace_opts[i].name); } return 0; } static int __set_tracer_option(struct trace_array *tr, struct tracer_flags *tracer_flags, struct tracer_opt *opts, int neg) { struct tracer *trace = tracer_flags->trace; int ret = 0; if (trace->set_flag) ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); if (ret) return ret; if (neg) tracer_flags->val &= ~opts->bit; else tracer_flags->val |= opts->bit; return 0; } /* Try to assign a tracer specific option */ static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) { struct tracer_flags *tracer_flags = tr->current_trace_flags; struct tracer_opt *opts = NULL; int i; if (!tracer_flags || !tracer_flags->opts) return 0; for (i = 0; tracer_flags->opts[i].name; i++) { opts = &tracer_flags->opts[i]; if (strcmp(cmp, opts->name) == 0) return __set_tracer_option(tr, tracer_flags, opts, neg); } return -EINVAL; } /* Some tracers require overwrite to stay enabled */ int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) { if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) return -1; return 0; } int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) { switch (mask) { case TRACE_ITER(RECORD_TGID): case TRACE_ITER(RECORD_CMD): case TRACE_ITER(TRACE_PRINTK): case TRACE_ITER(COPY_MARKER): lockdep_assert_held(&event_mutex); } /* do nothing if flag is already set */ if (!!(tr->trace_flags & mask) == !!enabled) return 0; /* Give the tracer a chance to approve the change */ if (tr->current_trace->flag_changed) if (tr->current_trace->flag_changed(tr, mask, !!enabled)) return -EINVAL; switch (mask) { case TRACE_ITER(TRACE_PRINTK): if (enabled) { update_printk_trace(tr); } else { /* * The global_trace cannot clear this. * It's flag only gets cleared if another instance sets it. */ if (printk_trace == &global_trace) return -EINVAL; /* * An instance must always have it set. * by default, that's the global_trace instance. */ if (printk_trace == tr) update_printk_trace(&global_trace); } break; case TRACE_ITER(COPY_MARKER): update_marker_trace(tr, enabled); /* update_marker_trace updates the tr->trace_flags */ return 0; } if (enabled) tr->trace_flags |= mask; else tr->trace_flags &= ~mask; switch (mask) { case TRACE_ITER(RECORD_CMD): trace_event_enable_cmd_record(enabled); break; case TRACE_ITER(RECORD_TGID): if (trace_alloc_tgid_map() < 0) { tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); return -ENOMEM; } trace_event_enable_tgid_record(enabled); break; case TRACE_ITER(EVENT_FORK): trace_event_follow_fork(tr, enabled); break; case TRACE_ITER(FUNC_FORK): ftrace_pid_follow_fork(tr, enabled); break; case TRACE_ITER(OVERWRITE): ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); #endif break; case TRACE_ITER(PRINTK): trace_printk_start_stop_comm(enabled); trace_printk_control(enabled); break; #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) case TRACE_GRAPH_GRAPH_TIME: ftrace_graph_graph_time_control(enabled); break; #endif } return 0; } int trace_set_options(struct trace_array *tr, char *option) { char *cmp; int neg = 0; int ret; size_t orig_len = strlen(option); int len; cmp = strstrip(option); len = str_has_prefix(cmp, "no"); if (len) neg = 1; cmp += len; mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = match_string(trace_options, -1, cmp); /* If no option could be set, test the specific tracer options */ if (ret < 0) ret = set_tracer_option(tr, cmp, neg); else ret = set_tracer_flag(tr, 1ULL << ret, !neg); mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); /* * If the first trailing whitespace is replaced with '\0' by strstrip, * turn it back into a space. */ if (orig_len > strlen(option)) option[strlen(option)] = ' '; return ret; } static void __init apply_trace_boot_options(void) { char *buf = trace_boot_options_buf; char *option; while (true) { option = strsep(&buf, ","); if (!option) break; if (*option) trace_set_options(&global_trace, option); /* Put back the comma to allow this to be called again */ if (buf) *(buf - 1) = ','; } } static ssize_t tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; char buf[64]; int ret; if (cnt >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; ret = trace_set_options(tr, buf); if (ret < 0) return ret; *ppos += cnt; return cnt; } static int tracing_trace_options_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; ret = single_open(file, tracing_trace_options_show, inode->i_private); if (ret < 0) trace_array_put(tr); return ret; } static const struct file_operations tracing_iter_fops = { .open = tracing_trace_options_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_single_release_tr, .write = tracing_trace_options_write, }; static const char readme_msg[] = "tracing mini-HOWTO:\n\n" "By default tracefs removes all OTH file permission bits.\n" "When mounting tracefs an optional group id can be specified\n" "which adds the group to every directory and file in tracefs:\n\n" "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" "# echo 0 > tracing_on : quick way to disable tracing\n" "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" " Important files:\n" " trace\t\t\t- The static contents of the buffer\n" "\t\t\t To clear the buffer write into this file: echo > trace\n" " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" " current_tracer\t- function and latency tracers\n" " available_tracers\t- list of configured tracers for current_tracer\n" " error_log\t- error log for failed commands (that support it)\n" " buffer_size_kb\t- view and modify size of per cpu buffer\n" " buffer_total_size_kb - view total size of all cpu buffers\n\n" " trace_clock\t\t- change the clock used to order events\n" " local: Per cpu clock but may not be synced across CPUs\n" " global: Synced across CPUs but slows tracing down.\n" " counter: Not a clock, but just an increment\n" " uptime: Jiffy counter from time of boot\n" " perf: Same clock that perf events use\n" #ifdef CONFIG_X86_64 " x86-tsc: TSC cycle counter\n" #endif "\n timestamp_mode\t- view the mode used to timestamp events\n" " delta: Delta difference against a buffer-wide timestamp\n" " absolute: Absolute (standalone) timestamp\n" "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" " tracing_cpumask\t- Limit which CPUs to trace\n" " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" " trace_options\t\t- Set format or modify how tracing happens\n" "\t\t\t Disable an option by prefixing 'no' to the\n" "\t\t\t option name\n" " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" #ifdef CONFIG_DYNAMIC_FTRACE "\n available_filter_functions - list of functions that can be filtered on\n" " set_ftrace_filter\t- echo function name in here to only trace these\n" "\t\t\t functions\n" "\t accepts: func_full_name or glob-matching-pattern\n" "\t modules: Can select a group via module\n" "\t Format: :mod:<module-name>\n" "\t example: echo :mod:ext3 > set_ftrace_filter\n" "\t triggers: a command to perform when function is hit\n" "\t Format: <function>:<trigger>[:count]\n" "\t trigger: traceon, traceoff\n" "\t\t enable_event:<system>:<event>\n" "\t\t disable_event:<system>:<event>\n" #ifdef CONFIG_STACKTRACE "\t\t stacktrace\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT "\t\t snapshot\n" #endif "\t\t dump\n" "\t\t cpudump\n" "\t example: echo do_fault:traceoff > set_ftrace_filter\n" "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" "\t The first one will disable tracing every time do_fault is hit\n" "\t The second will disable tracing at most 3 times when do_trap is hit\n" "\t The first time do trap is hit and it disables tracing, the\n" "\t counter will decrement to 2. If tracing is already disabled,\n" "\t the counter will not decrement. It only decrements when the\n" "\t trigger did work\n" "\t To remove trigger without count:\n" "\t echo '!<function>:<trigger> > set_ftrace_filter\n" "\t To remove trigger with a count:\n" "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" " set_ftrace_notrace\t- echo function name in here to never trace.\n" "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" "\t modules: Can select a group via module command :mod:\n" "\t Does not accept triggers\n" #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_TRACER " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" "\t\t (function)\n" " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" "\t\t (function)\n" #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" "\t\t\t snapshot buffer. Read the contents for more\n" "\t\t\t information\n" #endif #ifdef CONFIG_STACK_TRACER " stack_trace\t\t- Shows the max stack trace when active\n" " stack_max_size\t- Shows current max stack size that was traced\n" "\t\t\t Write into this file to reset the max size (trigger a\n" "\t\t\t new trace)\n" #ifdef CONFIG_DYNAMIC_FTRACE " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" "\t\t\t traces\n" #endif #endif /* CONFIG_STACK_TRACER */ #ifdef CONFIG_DYNAMIC_EVENTS " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_KPROBE_EVENTS " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_UPROBE_EVENTS " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ defined(CONFIG_FPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" #endif #ifdef CONFIG_FPROBE_EVENTS "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" #endif #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/]<event> <field> [<field>]\n" #endif "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" "\t -:[<group>/][<event>]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" #endif #ifdef CONFIG_UPROBE_EVENTS " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" #endif "\t args: <name>=fetcharg[:type]\n" "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS "\t <argname>[->field[->field|.field...]],\n" #endif #else "\t $stack<index>, $stack, $retval, $comm,\n" #endif "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" "\t kernel return probes support: $retval, $arg<N>, $comm\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" #ifdef CONFIG_HIST_TRIGGERS "\t field: <stype> <name>;\n" "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" "\t [unsigned] char/int/long\n" #endif "\t efield: For event probes ('e' types), the field is on of the fields\n" "\t of the <attached-group>/<attached-event>.\n" #endif " set_event\t\t- Enables events by name written into it\n" "\t\t\t Can enable module events via: :mod:<module>\n" " events/\t\t- Directory containing all trace event subsystems:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" " events/<system>/\t- Directory containing all trace events for <system>:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" "\t\t\t events\n" " filter\t\t- If set, only events passing filter are traced\n" " events/<system>/<event>/\t- Directory containing control files for\n" "\t\t\t <event>:\n" " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" " filter\t\t- If set, only events passing filter are traced\n" " trigger\t\t- If set, a command to perform when event is hit\n" "\t Format: <trigger>[:count][if <filter>]\n" "\t trigger: traceon, traceoff\n" "\t enable_event:<system>:<event>\n" "\t disable_event:<system>:<event>\n" #ifdef CONFIG_HIST_TRIGGERS "\t enable_hist:<system>:<event>\n" "\t disable_hist:<system>:<event>\n" #endif #ifdef CONFIG_STACKTRACE "\t\t stacktrace\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT "\t\t snapshot\n" #endif #ifdef CONFIG_HIST_TRIGGERS "\t\t hist (see below)\n" #endif "\t example: echo traceoff > events/block/block_unplug/trigger\n" "\t echo traceoff:3 > events/block/block_unplug/trigger\n" "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" "\t events/block/block_unplug/trigger\n" "\t The first disables tracing every time block_unplug is hit.\n" "\t The second disables tracing the first 3 times block_unplug is hit.\n" "\t The third enables the kmalloc event the first 3 times block_unplug\n" "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" "\t Like function triggers, the counter is only decremented if it\n" "\t enabled or disabled tracing.\n" "\t To remove a trigger without a count:\n" "\t echo '!<trigger> > <system>/<event>/trigger\n" "\t To remove a trigger with a count:\n" "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" "\t Filters can be ignored when removing a trigger.\n" #ifdef CONFIG_HIST_TRIGGERS " hist trigger\t- If set, event hits are aggregated into a hash table\n" "\t Format: hist:keys=<field1[,field2,...]>\n" "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" "\t [:values=<field1[,field2,...]>]\n" "\t [:sort=<field1[,field2,...]>]\n" "\t [:size=#entries]\n" "\t [:pause][:continue][:clear]\n" "\t [:name=histname1]\n" "\t [:nohitcount]\n" "\t [:<handler>.<action>]\n" "\t [if <filter>]\n\n" "\t Note, special fields can be used as well:\n" "\t common_timestamp - to record current timestamp\n" "\t common_cpu - to record the CPU the event happened on\n" "\n" "\t A hist trigger variable can be:\n" "\t - a reference to a field e.g. x=current_timestamp,\n" "\t - a reference to another variable e.g. y=$x,\n" "\t - a numeric literal: e.g. ms_per_sec=1000,\n" "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" "\n" "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" "\t multiplication(*) and division(/) operators. An operand can be either a\n" "\t variable reference, field or numeric literal.\n" "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" "\t correspond to fields in the event's format description. Keys\n" "\t can be any field, or the special string 'common_stacktrace'.\n" "\t Compound keys consisting of up to two fields can be specified\n" "\t by the 'keys' keyword. Values must correspond to numeric\n" "\t fields. Sort keys consisting of up to two fields can be\n" "\t specified using the 'sort' keyword. The sort direction can\n" "\t be modified by appending '.descending' or '.ascending' to a\n" "\t sort field. The 'size' parameter can be used to specify more\n" "\t or fewer than the default 2048 entries for the hashtable size.\n" "\t If a hist trigger is given a name using the 'name' parameter,\n" "\t its histogram data will be shared with other triggers of the\n" "\t same name, and trigger hits will update this common data.\n\n" "\t Reading the 'hist' file for the event will dump the hash\n" "\t table in its entirety to stdout. If there are multiple hist\n" "\t triggers attached to an event, there will be a table for each\n" "\t trigger in the output. The table displayed for a named\n" "\t trigger will be the same as any other instance having the\n" "\t same name. The default format used to display a given field\n" "\t can be modified by appending any of the following modifiers\n" "\t to the field name, as applicable:\n\n" "\t .hex display a number as a hex value\n" "\t .sym display an address as a symbol\n" "\t .sym-offset display an address as a symbol and offset\n" "\t .execname display a common_pid as a program name\n" "\t .syscall display a syscall id as a syscall name\n" "\t .log2 display log2 value rather than raw number\n" "\t .buckets=size display values in groups of size rather than raw number\n" "\t .usecs display a common_timestamp in microseconds\n" "\t .percent display a number of percentage value\n" "\t .graph display a bar-graph of a value\n\n" "\t The 'pause' parameter can be used to pause an existing hist\n" "\t trigger or to start a hist trigger but not log any events\n" "\t until told to do so. 'continue' can be used to start or\n" "\t restart a paused hist trigger.\n\n" "\t The 'clear' parameter will clear the contents of a running\n" "\t hist trigger and leave its current paused/active state\n" "\t unchanged.\n\n" "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" "\t raw hitcount in the histogram.\n\n" "\t The enable_hist and disable_hist triggers can be used to\n" "\t have one event conditionally start and stop another event's\n" "\t already-attached hist trigger. The syntax is analogous to\n" "\t the enable_event and disable_event triggers.\n\n" "\t Hist trigger handlers and actions are executed whenever a\n" "\t a histogram entry is added or updated. They take the form:\n\n" "\t <handler>.<action>\n\n" "\t The available handlers are:\n\n" "\t onmatch(matching.event) - invoke on addition or update\n" "\t onmax(var) - invoke if var exceeds current max\n" "\t onchange(var) - invoke action if var changes\n\n" "\t The available actions are:\n\n" "\t trace(<synthetic_event>,param list) - generate synthetic event\n" "\t save(field,...) - save current event fields\n" #ifdef CONFIG_TRACER_SNAPSHOT "\t snapshot() - snapshot the trace buffer\n\n" #endif #ifdef CONFIG_SYNTH_EVENTS " events/synthetic_events\t- Create/append/remove/show synthetic events\n" "\t Write into this file to define/undefine new synthetic events.\n" "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" #endif #endif ; static ssize_t tracing_readme_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return simple_read_from_buffer(ubuf, cnt, ppos, readme_msg, strlen(readme_msg)); } static const struct file_operations tracing_readme_fops = { .open = tracing_open_generic, .read = tracing_readme_read, .llseek = generic_file_llseek, }; #ifdef CONFIG_TRACE_EVAL_MAP_FILE static union trace_eval_map_item * update_eval_map(union trace_eval_map_item *ptr) { if (!ptr->map.eval_string) { if (ptr->tail.next) { ptr = ptr->tail.next; /* Set ptr to the next real item (skip head) */ ptr++; } else return NULL; } return ptr; } static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) { union trace_eval_map_item *ptr = v; /* * Paranoid! If ptr points to end, we don't want to increment past it. * This really should never happen. */ (*pos)++; ptr = update_eval_map(ptr); if (WARN_ON_ONCE(!ptr)) return NULL; ptr++; ptr = update_eval_map(ptr); return ptr; } static void *eval_map_start(struct seq_file *m, loff_t *pos) { union trace_eval_map_item *v; loff_t l = 0; mutex_lock(&trace_eval_mutex); v = trace_eval_maps; if (v) v++; while (v && l < *pos) { v = eval_map_next(m, v, &l); } return v; } static void eval_map_stop(struct seq_file *m, void *v) { mutex_unlock(&trace_eval_mutex); } static int eval_map_show(struct seq_file *m, void *v) { union trace_eval_map_item *ptr = v; seq_printf(m, "%s %ld (%s)\n", ptr->map.eval_string, ptr->map.eval_value, ptr->map.system); return 0; } static const struct seq_operations tracing_eval_map_seq_ops = { .start = eval_map_start, .next = eval_map_next, .stop = eval_map_stop, .show = eval_map_show, }; static int tracing_eval_map_open(struct inode *inode, struct file *filp) { int ret; ret = tracing_check_open_get_tr(NULL); if (ret) return ret; return seq_open(filp, &tracing_eval_map_seq_ops); } static const struct file_operations tracing_eval_map_fops = { .open = tracing_eval_map_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static inline union trace_eval_map_item * trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) { /* Return tail of array given the head */ return ptr + ptr->head.length + 1; } static void trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, int len) { struct trace_eval_map **stop; struct trace_eval_map **map; union trace_eval_map_item *map_array; union trace_eval_map_item *ptr; stop = start + len; /* * The trace_eval_maps contains the map plus a head and tail item, * where the head holds the module and length of array, and the * tail holds a pointer to the next list. */ map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL); if (!map_array) { pr_warn("Unable to allocate trace eval mapping\n"); return; } guard(mutex)(&trace_eval_mutex); if (!trace_eval_maps) trace_eval_maps = map_array; else { ptr = trace_eval_maps; for (;;) { ptr = trace_eval_jmp_to_tail(ptr); if (!ptr->tail.next) break; ptr = ptr->tail.next; } ptr->tail.next = map_array; } map_array->head.mod = mod; map_array->head.length = len; map_array++; for (map = start; (unsigned long)map < (unsigned long)stop; map++) { map_array->map = **map; map_array++; } memset(map_array, 0, sizeof(*map_array)); } static void trace_create_eval_file(struct dentry *d_tracer) { trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, NULL, &tracing_eval_map_fops); } #else /* CONFIG_TRACE_EVAL_MAP_FILE */ static inline void trace_create_eval_file(struct dentry *d_tracer) { } static inline void trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, int len) { } #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ static void trace_event_update_with_eval_map(struct module *mod, struct trace_eval_map **start, int len) { struct trace_eval_map **map; /* Always run sanitizer only if btf_type_tag attr exists. */ if (len <= 0) { if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && __has_attribute(btf_type_tag))) return; } map = start; trace_event_update_all(map, len); if (len <= 0) return; trace_insert_eval_map_file(mod, start, len); } static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; char buf[MAX_TRACER_SIZE+2]; int r; scoped_guard(mutex, &trace_types_lock) { r = sprintf(buf, "%s\n", tr->current_trace->name); } return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } int tracer_init(struct tracer *t, struct trace_array *tr) { tracing_reset_online_cpus(&tr->array_buffer); return t->init(tr); } static void set_buffer_entries(struct array_buffer *buf, unsigned long val) { int cpu; for_each_tracing_cpu(cpu) per_cpu_ptr(buf->data, cpu)->entries = val; } static void update_buffer_entries(struct array_buffer *buf, int cpu) { if (cpu == RING_BUFFER_ALL_CPUS) { set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); } else { per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); } } #ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, struct array_buffer *size_buf, int cpu_id) { int cpu, ret = 0; if (cpu_id == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { ret = ring_buffer_resize(trace_buf->buffer, per_cpu_ptr(size_buf->data, cpu)->entries, cpu); if (ret < 0) break; per_cpu_ptr(trace_buf->data, cpu)->entries = per_cpu_ptr(size_buf->data, cpu)->entries; } } else { ret = ring_buffer_resize(trace_buf->buffer, per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); if (ret == 0) per_cpu_ptr(trace_buf->data, cpu_id)->entries = per_cpu_ptr(size_buf->data, cpu_id)->entries; } return ret; } #endif /* CONFIG_TRACER_MAX_TRACE */ static int __tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu) { int ret; /* * If kernel or user changes the size of the ring buffer * we use the size that was given, and we can forget about * expanding it later. */ trace_set_ring_buffer_expanded(tr); /* May be called before buffers are initialized */ if (!tr->array_buffer.buffer) return 0; /* Do not allow tracing while resizing ring buffer */ tracing_stop_tr(tr); ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE if (!tr->allocated_snapshot) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); if (ret < 0) { int r = resize_buffer_duplicate_size(&tr->array_buffer, &tr->array_buffer, cpu); if (r < 0) { /* * AARGH! We are left with different * size max buffer!!!! * The max buffer is our "snapshot" buffer. * When a tracer needs a snapshot (one of the * latency tracers), it swaps the max buffer * with the saved snap shot. We succeeded to * update the size of the main buffer, but failed to * update the size of the max buffer. But when we tried * to reset the main buffer to the original size, we * failed there too. This is very unlikely to * happen, but if it does, warn and kill all * tracing. */ WARN_ON(1); tracing_disabled = 1; } goto out_start; } update_buffer_entries(&tr->max_buffer, cpu); out: #endif /* CONFIG_TRACER_MAX_TRACE */ update_buffer_entries(&tr->array_buffer, cpu); out_start: tracing_start_tr(tr); return ret; } ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id) { guard(mutex)(&trace_types_lock); if (cpu_id != RING_BUFFER_ALL_CPUS) { /* make sure, this cpu is enabled in the mask */ if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) return -EINVAL; } return __tracing_resize_ring_buffer(tr, size, cpu_id); } struct trace_mod_entry { unsigned long mod_addr; char mod_name[MODULE_NAME_LEN]; }; struct trace_scratch { unsigned int clock_id; unsigned long text_addr; unsigned long nr_entries; struct trace_mod_entry entries[]; }; static DEFINE_MUTEX(scratch_mutex); static int cmp_mod_entry(const void *key, const void *pivot) { unsigned long addr = (unsigned long)key; const struct trace_mod_entry *ent = pivot; if (addr < ent[0].mod_addr) return -1; return addr >= ent[1].mod_addr; } /** * trace_adjust_address() - Adjust prev boot address to current address. * @tr: Persistent ring buffer's trace_array. * @addr: Address in @tr which is adjusted. */ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) { struct trace_module_delta *module_delta; struct trace_scratch *tscratch; struct trace_mod_entry *entry; unsigned long raddr; int idx = 0, nr_entries; /* If we don't have last boot delta, return the address */ if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) return addr; /* tr->module_delta must be protected by rcu. */ guard(rcu)(); tscratch = tr->scratch; /* if there is no tscrach, module_delta must be NULL. */ module_delta = READ_ONCE(tr->module_delta); if (!module_delta || !tscratch->nr_entries || tscratch->entries[0].mod_addr > addr) { raddr = addr + tr->text_delta; return __is_kernel(raddr) || is_kernel_core_data(raddr) || is_kernel_rodata(raddr) ? raddr : addr; } /* Note that entries must be sorted. */ nr_entries = tscratch->nr_entries; if (nr_entries == 1 || tscratch->entries[nr_entries - 1].mod_addr < addr) idx = nr_entries - 1; else { entry = __inline_bsearch((void *)addr, tscratch->entries, nr_entries - 1, sizeof(tscratch->entries[0]), cmp_mod_entry); if (entry) idx = entry - tscratch->entries; } return addr + module_delta->delta[idx]; } #ifdef CONFIG_MODULES static int save_mod(struct module *mod, void *data) { struct trace_array *tr = data; struct trace_scratch *tscratch; struct trace_mod_entry *entry; unsigned int size; tscratch = tr->scratch; if (!tscratch) return -1; size = tr->scratch_size; if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) return -1; entry = &tscratch->entries[tscratch->nr_entries]; tscratch->nr_entries++; entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; strscpy(entry->mod_name, mod->name); return 0; } #else static int save_mod(struct module *mod, void *data) { return 0; } #endif static void update_last_data(struct trace_array *tr) { struct trace_module_delta *module_delta; struct trace_scratch *tscratch; if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) return; if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) return; /* Only if the buffer has previous boot data clear and update it. */ tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; /* Reset the module list and reload them */ if (tr->scratch) { struct trace_scratch *tscratch = tr->scratch; tscratch->clock_id = tr->clock_id; memset(tscratch->entries, 0, flex_array_size(tscratch, entries, tscratch->nr_entries)); tscratch->nr_entries = 0; guard(mutex)(&scratch_mutex); module_for_each_mod(save_mod, tr); } /* * Need to clear all CPU buffers as there cannot be events * from the previous boot mixed with events with this boot * as that will cause a confusing trace. Need to clear all * CPU buffers, even for those that may currently be offline. */ tracing_reset_all_cpus(&tr->array_buffer); /* Using current data now */ tr->text_delta = 0; if (!tr->scratch) return; tscratch = tr->scratch; module_delta = READ_ONCE(tr->module_delta); WRITE_ONCE(tr->module_delta, NULL); kfree_rcu(module_delta, rcu); /* Set the persistent ring buffer meta data to this address */ tscratch->text_addr = (unsigned long)_text; } /** * tracing_update_buffers - used by tracing facility to expand ring buffers * @tr: The tracing instance * * To save on memory when the tracing is never used on a system with it * configured in. The ring buffers are set to a minimum size. But once * a user starts to use the tracing facility, then they need to grow * to their default size. * * This function is to be called when a tracer is about to be used. */ int tracing_update_buffers(struct trace_array *tr) { int ret = 0; guard(mutex)(&trace_types_lock); update_last_data(tr); if (!tr->ring_buffer_expanded) ret = __tracing_resize_ring_buffer(tr, trace_buf_size, RING_BUFFER_ALL_CPUS); return ret; } /* * Used to clear out the tracer before deletion of an instance. * Must have trace_types_lock held. */ static void tracing_set_nop(struct trace_array *tr) { if (tr->current_trace == &nop_trace) return; tr->current_trace->enabled--; if (tr->current_trace->reset) tr->current_trace->reset(tr); tr->current_trace = &nop_trace; tr->current_trace_flags = nop_trace.flags; } static bool tracer_options_updated; int tracing_set_tracer(struct trace_array *tr, const char *buf) { struct tracer *trace = NULL; struct tracers *t; #ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; #endif int ret; guard(mutex)(&trace_types_lock); update_last_data(tr); if (!tr->ring_buffer_expanded) { ret = __tracing_resize_ring_buffer(tr, trace_buf_size, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; ret = 0; } list_for_each_entry(t, &tr->tracers, list) { if (strcmp(t->tracer->name, buf) == 0) { trace = t->tracer; break; } } if (!trace) return -EINVAL; if (trace == tr->current_trace) return 0; #ifdef CONFIG_TRACER_SNAPSHOT if (trace->use_max_tr) { local_irq_disable(); arch_spin_lock(&tr->max_lock); ret = tr->cond_snapshot ? -EBUSY : 0; arch_spin_unlock(&tr->max_lock); local_irq_enable(); if (ret) return ret; } #endif /* Some tracers won't work on kernel command line */ if (system_state < SYSTEM_RUNNING && trace->noboot) { pr_warn("Tracer '%s' is not allowed on command line, ignored\n", trace->name); return -EINVAL; } /* Some tracers are only allowed for the top level buffer */ if (!trace_ok_for_array(trace, tr)) return -EINVAL; /* If trace pipe files are being read, we can't change the tracer */ if (tr->trace_ref) return -EBUSY; trace_branch_disable(); tr->current_trace->enabled--; if (tr->current_trace->reset) tr->current_trace->reset(tr); #ifdef CONFIG_TRACER_MAX_TRACE had_max_tr = tr->current_trace->use_max_tr; /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; tr->current_trace_flags = nop_trace.flags; if (had_max_tr && !trace->use_max_tr) { /* * We need to make sure that the update_max_tr sees that * current_trace changed to nop_trace to keep it from * swapping the buffers after we resize it. * The update_max_tr is called from interrupts disabled * so a synchronized_sched() is sufficient. */ synchronize_rcu(); free_snapshot(tr); tracing_disarm_snapshot(tr); } if (!had_max_tr && trace->use_max_tr) { ret = tracing_arm_snapshot_locked(tr); if (ret) return ret; } #else tr->current_trace = &nop_trace; #endif tr->current_trace_flags = t->flags ? : t->tracer->flags; if (trace->init) { ret = tracer_init(trace, tr); if (ret) { #ifdef CONFIG_TRACER_MAX_TRACE if (trace->use_max_tr) tracing_disarm_snapshot(tr); #endif tr->current_trace_flags = nop_trace.flags; return ret; } } tr->current_trace = trace; tr->current_trace->enabled++; trace_branch_enable(tr); return 0; } static ssize_t tracing_set_trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; char buf[MAX_TRACER_SIZE+1]; char *name; size_t ret; int err; ret = cnt; if (cnt > MAX_TRACER_SIZE) cnt = MAX_TRACER_SIZE; if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; name = strim(buf); err = tracing_set_tracer(tr, name); if (err) return err; *ppos += ret; return ret; } static ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, size_t cnt, loff_t *ppos) { char buf[64]; int r; r = snprintf(buf, sizeof(buf), "%ld\n", *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); if (r > sizeof(buf)) r = sizeof(buf); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, size_t cnt, loff_t *ppos) { unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; *ptr = val * 1000; return cnt; } static ssize_t tracing_thresh_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); } static ssize_t tracing_thresh_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; int ret; guard(mutex)(&trace_types_lock); ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); if (ret < 0) return ret; if (tr->current_trace->update_thresh) { ret = tr->current_trace->update_thresh(tr); if (ret < 0) return ret; } return cnt; } #ifdef CONFIG_TRACER_MAX_TRACE static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); } static ssize_t tracing_max_lat_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); } #endif static int open_pipe_on_cpu(struct trace_array *tr, int cpu) { if (cpu == RING_BUFFER_ALL_CPUS) { if (cpumask_empty(tr->pipe_cpumask)) { cpumask_setall(tr->pipe_cpumask); return 0; } } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { cpumask_set_cpu(cpu, tr->pipe_cpumask); return 0; } return -EBUSY; } static void close_pipe_on_cpu(struct trace_array *tr, int cpu) { if (cpu == RING_BUFFER_ALL_CPUS) { WARN_ON(!cpumask_full(tr->pipe_cpumask)); cpumask_clear(tr->pipe_cpumask); } else { WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); cpumask_clear_cpu(cpu, tr->pipe_cpumask); } } static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; int cpu; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; guard(mutex)(&trace_types_lock); cpu = tracing_get_cpu(inode); ret = open_pipe_on_cpu(tr, cpu); if (ret) goto fail_pipe_on_cpu; /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { ret = -ENOMEM; goto fail_alloc_iter; } trace_seq_init(&iter->seq); iter->trace = tr->current_trace; if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { ret = -ENOMEM; goto fail; } /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) iter->iter_flags |= TRACE_FILE_LAT_FMT; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; iter->tr = tr; iter->array_buffer = &tr->array_buffer; iter->cpu_file = cpu; mutex_init(&iter->mutex); filp->private_data = iter; if (iter->trace->pipe_open) iter->trace->pipe_open(iter); nonseekable_open(inode, filp); tr->trace_ref++; return ret; fail: kfree(iter); fail_alloc_iter: close_pipe_on_cpu(tr, cpu); fail_pipe_on_cpu: __trace_array_put(tr); return ret; } static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; struct trace_array *tr = inode->i_private; scoped_guard(mutex, &trace_types_lock) { tr->trace_ref--; if (iter->trace->pipe_close) iter->trace->pipe_close(iter); close_pipe_on_cpu(tr, iter->cpu_file); } free_trace_iter_content(iter); kfree(iter); trace_array_put(tr); return 0; } static __poll_t trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) { struct trace_array *tr = iter->tr; /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return EPOLLIN | EPOLLRDNORM; if (tr->trace_flags & TRACE_ITER(BLOCK)) /* * Always select as readable when in blocking mode */ return EPOLLIN | EPOLLRDNORM; else return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, filp, poll_table, iter->tr->buffer_percent); } static __poll_t tracing_poll_pipe(struct file *filp, poll_table *poll_table) { struct trace_iterator *iter = filp->private_data; return trace_poll(iter, filp, poll_table); } /* Must be called with iter->mutex held. */ static int tracing_wait_pipe(struct file *filp) { struct trace_iterator *iter = filp->private_data; int ret; while (trace_empty(iter)) { if ((filp->f_flags & O_NONBLOCK)) { return -EAGAIN; } /* * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never * read anything. This allows a user to cat this file, and * then enable tracing. But after we have read something, * we give an EOF when tracing is again disabled. * * iter->pos will be 0 if we haven't read anything. */ if (!tracer_tracing_is_on(iter->tr) && iter->pos) break; mutex_unlock(&iter->mutex); ret = wait_on_pipe(iter, 0); mutex_lock(&iter->mutex); if (ret) return ret; } return 1; } static bool update_last_data_if_empty(struct trace_array *tr) { if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) return false; if (!ring_buffer_empty(tr->array_buffer.buffer)) return false; /* * If the buffer contains the last boot data and all per-cpu * buffers are empty, reset it from the kernel side. */ update_last_data(tr); return true; } /* * Consumer reader. */ static ssize_t tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; ssize_t sret; /* * Avoid more than one consumer on a single file descriptor * This is just a matter of traces coherency, the ring buffer itself * is protected. */ guard(mutex)(&iter->mutex); /* return any leftover data */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (sret != -EBUSY) return sret; trace_seq_init(&iter->seq); if (iter->trace->read) { sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); if (sret) return sret; } waitagain: if (update_last_data_if_empty(iter->tr)) return 0; sret = tracing_wait_pipe(filp); if (sret <= 0) return sret; /* stop when tracing is finished */ if (trace_empty(iter)) return 0; if (cnt >= TRACE_SEQ_BUFFER_SIZE) cnt = TRACE_SEQ_BUFFER_SIZE - 1; /* reset all but tr, trace, and overruns */ trace_iterator_reset(iter); cpumask_clear(iter->started); trace_seq_init(&iter->seq); trace_event_read_lock(); trace_access_lock(iter->cpu_file); while (trace_find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int save_len = iter->seq.seq.len; ret = print_trace_line(iter); if (ret == TRACE_TYPE_PARTIAL_LINE) { /* * If one print_trace_line() fills entire trace_seq in one shot, * trace_seq_to_user() will returns -EBUSY because save_len == 0, * In this case, we need to consume it, otherwise, loop will peek * this event next time, resulting in an infinite loop. */ if (save_len == 0) { iter->seq.full = 0; trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); trace_consume(iter); break; } /* In other cases, don't print partial lines */ iter->seq.seq.len = save_len; break; } if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(iter); if (trace_seq_used(&iter->seq) >= cnt) break; /* * Setting the full flag means we reached the trace_seq buffer * size and we should leave by partial output condition above. * One of the trace_seq_* functions is not used properly. */ WARN_ONCE(iter->seq.full, "full flag set for trace type %d", iter->ent->type); } trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (iter->seq.readpos >= trace_seq_used(&iter->seq)) trace_seq_init(&iter->seq); /* * If there was nothing to send to user, in spite of consuming trace * entries, go back to wait for more entries. */ if (sret == -EBUSY) goto waitagain; return sret; } static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, unsigned int idx) { __free_page(spd->pages[idx]); } static size_t tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) { size_t count; int save_len; int ret; /* Seq buffer is page-sized, exactly what we need. */ for (;;) { save_len = iter->seq.seq.len; ret = print_trace_line(iter); if (trace_seq_has_overflowed(&iter->seq)) { iter->seq.seq.len = save_len; break; } /* * This should not be hit, because it should only * be set if the iter->seq overflowed. But check it * anyway to be safe. */ if (ret == TRACE_TYPE_PARTIAL_LINE) { iter->seq.seq.len = save_len; break; } count = trace_seq_used(&iter->seq) - save_len; if (rem < count) { rem = 0; iter->seq.seq.len = save_len; break; } if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(iter); rem -= count; if (!trace_find_next_entry_inc(iter)) { rem = 0; iter->ent = NULL; break; } } return rem; } static ssize_t tracing_splice_read_pipe(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct page *pages_def[PIPE_DEF_BUFFERS]; struct partial_page partial_def[PIPE_DEF_BUFFERS]; struct trace_iterator *iter = filp->private_data; struct splice_pipe_desc spd = { .pages = pages_def, .partial = partial_def, .nr_pages = 0, /* This gets updated below. */ .nr_pages_max = PIPE_DEF_BUFFERS, .ops = &default_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; ssize_t ret; size_t rem; unsigned int i; if (splice_grow_spd(pipe, &spd)) return -ENOMEM; mutex_lock(&iter->mutex); if (iter->trace->splice_read) { ret = iter->trace->splice_read(iter, filp, ppos, pipe, len, flags); if (ret) goto out_err; } ret = tracing_wait_pipe(filp); if (ret <= 0) goto out_err; if (!iter->ent && !trace_find_next_entry_inc(iter)) { ret = -EFAULT; goto out_err; } trace_event_read_lock(); trace_access_lock(iter->cpu_file); /* Fill as many pages as possible. */ for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { spd.pages[i] = alloc_page(GFP_KERNEL); if (!spd.pages[i]) break; rem = tracing_fill_pipe_page(rem, iter); /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), min((size_t)trace_seq_used(&iter->seq), (size_t)PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; spd.partial[i].len = ret; trace_seq_init(&iter->seq); } trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); mutex_unlock(&iter->mutex); spd.nr_pages = i; if (i) ret = splice_to_pipe(pipe, &spd); else ret = 0; out: splice_shrink_spd(&spd); return ret; out_err: mutex_unlock(&iter->mutex); goto out; } static ssize_t tracing_syscall_buf_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; char buf[64]; int r; r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; if (val > SYSCALL_FAULT_USER_MAX) val = SYSCALL_FAULT_USER_MAX; tr->syscall_buf_sz = val; *ppos += cnt; return cnt; } static ssize_t tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; int cpu = tracing_get_cpu(inode); char buf[64]; int r = 0; ssize_t ret; mutex_lock(&trace_types_lock); if (cpu == RING_BUFFER_ALL_CPUS) { int cpu, buf_size_same; unsigned long size; size = 0; buf_size_same = 1; /* check if all cpu sizes are same */ for_each_tracing_cpu(cpu) { /* fill in the size from first enabled cpu */ if (size == 0) size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { buf_size_same = 0; break; } } if (buf_size_same) { if (!tr->ring_buffer_expanded) r = sprintf(buf, "%lu (expanded: %lu)\n", size >> 10, trace_buf_size >> 10); else r = sprintf(buf, "%lu\n", size >> 10); } else r = sprintf(buf, "X\n"); } else r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); mutex_unlock(&trace_types_lock); ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); return ret; } static ssize_t tracing_entries_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; /* must have at least 1 entry */ if (!val) return -EINVAL; /* value is in KB */ val <<= 10; ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); if (ret < 0) return ret; *ppos += cnt; return cnt; } static ssize_t tracing_total_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; char buf[64]; int r, cpu; unsigned long size = 0, expanded_size = 0; mutex_lock(&trace_types_lock); for_each_tracing_cpu(cpu) { size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; if (!tr->ring_buffer_expanded) expanded_size += trace_buf_size >> 10; } if (tr->ring_buffer_expanded) r = sprintf(buf, "%lu\n", size); else r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); mutex_unlock(&trace_types_lock); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } #define LAST_BOOT_HEADER ((void *)1) static void *l_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_array *tr = m->private; struct trace_scratch *tscratch = tr->scratch; unsigned int index = *pos; (*pos)++; if (*pos == 1) return LAST_BOOT_HEADER; /* Only show offsets of the last boot data */ if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) return NULL; /* *pos 0 is for the header, 1 is for the first module */ index--; if (index >= tscratch->nr_entries) return NULL; return &tscratch->entries[index]; } static void *l_start(struct seq_file *m, loff_t *pos) { mutex_lock(&scratch_mutex); return l_next(m, NULL, pos); } static void l_stop(struct seq_file *m, void *p) { mutex_unlock(&scratch_mutex); } static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) { struct trace_scratch *tscratch = tr->scratch; /* * Do not leak KASLR address. This only shows the KASLR address of * the last boot. When the ring buffer is started, the LAST_BOOT * flag gets cleared, and this should only report "current". * Otherwise it shows the KASLR address from the previous boot which * should not be the same as the current boot. */ if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); else seq_puts(m, "# Current\n"); } static int l_show(struct seq_file *m, void *v) { struct trace_array *tr = m->private; struct trace_mod_entry *entry = v; if (v == LAST_BOOT_HEADER) { show_last_boot_header(m, tr); return 0; } seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); return 0; } static const struct seq_operations last_boot_seq_ops = { .start = l_start, .next = l_next, .stop = l_stop, .show = l_show, }; static int tracing_last_boot_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; struct seq_file *m; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; ret = seq_open(file, &last_boot_seq_ops); if (ret) { trace_array_put(tr); return ret; } m = file->private_data; m->private = tr; return 0; } static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; int cpu = tracing_get_cpu(inode); int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); if (ret < 0) __trace_array_put(tr); return ret; } static ssize_t tracing_free_buffer_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { /* * There is no need to read what the user has written, this function * is just to make sure that there is no error when "echo" is used */ *ppos += cnt; return cnt; } static int tracing_free_buffer_release(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; /* disable tracing ? */ if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) tracer_tracing_off(tr); /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); trace_array_put(tr); return 0; } #define TRACE_MARKER_MAX_SIZE 4096 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, size_t cnt, unsigned long ip) { struct ring_buffer_event *event; enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; int meta_size; ssize_t written; size_t size; meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ again: size = cnt + meta_size; buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, tracing_gen_ctx()); if (unlikely(!event)) { /* * If the size was greater than what was allowed, then * make it smaller and try again. */ if (size > ring_buffer_max_event_size(buffer)) { cnt = ring_buffer_max_event_size(buffer) - meta_size; /* The above should only happen once */ if (WARN_ON_ONCE(cnt + meta_size == size)) return -EBADF; goto again; } /* Ring buffer disabled, return as if not open for write */ return -EBADF; } entry = ring_buffer_event_data(event); entry->ip = ip; memcpy(&entry->buf, buf, cnt); written = cnt; if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { /* do not add \n before testing triggers, but add \0 */ entry->buf[cnt] = '\0'; tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); } if (entry->buf[cnt - 1] != '\n') { entry->buf[cnt] = '\n'; entry->buf[cnt + 1] = '\0'; } else entry->buf[cnt] = '\0'; if (static_branch_unlikely(&trace_marker_exports_enabled)) ftrace_exports(event, TRACE_EXPORT_MARKER); __buffer_unlock_commit(buffer, event); if (tt) event_triggers_post_call(tr->trace_marker_file, tt); return written; } struct trace_user_buf { char *buf; }; static DEFINE_MUTEX(trace_user_buffer_mutex); static struct trace_user_buf_info *trace_user_buffer; /** * trace_user_fault_destroy - free up allocated memory of a trace user buffer * @tinfo: The descriptor to free up * * Frees any data allocated in the trace info dsecriptor. */ void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) { char *buf; int cpu; if (!tinfo || !tinfo->tbuf) return; for_each_possible_cpu(cpu) { buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; kfree(buf); } free_percpu(tinfo->tbuf); } static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) { char *buf; int cpu; lockdep_assert_held(&trace_user_buffer_mutex); tinfo->tbuf = alloc_percpu(struct trace_user_buf); if (!tinfo->tbuf) return -ENOMEM; tinfo->ref = 1; tinfo->size = size; /* Clear each buffer in case of error */ for_each_possible_cpu(cpu) { per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; } for_each_possible_cpu(cpu) { buf = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); if (!buf) return -ENOMEM; per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; } return 0; } /* For internal use. Free and reinitialize */ static void user_buffer_free(struct trace_user_buf_info **tinfo) { lockdep_assert_held(&trace_user_buffer_mutex); trace_user_fault_destroy(*tinfo); kfree(*tinfo); *tinfo = NULL; } /* For internal use. Initialize and allocate */ static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) { bool alloc = false; int ret; lockdep_assert_held(&trace_user_buffer_mutex); if (!*tinfo) { alloc = true; *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL); if (!*tinfo) return -ENOMEM; } ret = user_fault_buffer_enable(*tinfo, size); if (ret < 0 && alloc) user_buffer_free(tinfo); return ret; } /* For internal use, derefrence and free if necessary */ static void user_buffer_put(struct trace_user_buf_info **tinfo) { guard(mutex)(&trace_user_buffer_mutex); if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) return; if (--(*tinfo)->ref) return; user_buffer_free(tinfo); } /** * trace_user_fault_init - Allocated or reference a per CPU buffer * @tinfo: A pointer to the trace buffer descriptor * @size: The size to allocate each per CPU buffer * * Create a per CPU buffer that can be used to copy from user space * in a task context. When calling trace_user_fault_read(), preemption * must be disabled, and it will enable preemption and copy user * space data to the buffer. If any schedule switches occur, it will * retry until it succeeds without a schedule switch knowing the buffer * is still valid. * * Returns 0 on success, negative on failure. */ int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) { int ret; if (!tinfo) return -EINVAL; guard(mutex)(&trace_user_buffer_mutex); ret = user_buffer_init(&tinfo, size); if (ret < 0) trace_user_fault_destroy(tinfo); return ret; } /** * trace_user_fault_get - up the ref count for the user buffer * @tinfo: A pointer to a pointer to the trace buffer descriptor * * Ups the ref count of the trace buffer. * * Returns the new ref count. */ int trace_user_fault_get(struct trace_user_buf_info *tinfo) { if (!tinfo) return -1; guard(mutex)(&trace_user_buffer_mutex); tinfo->ref++; return tinfo->ref; } /** * trace_user_fault_put - dereference a per cpu trace buffer * @tinfo: The @tinfo that was passed to trace_user_fault_get() * * Decrement the ref count of @tinfo. * * Returns the new refcount (negative on error). */ int trace_user_fault_put(struct trace_user_buf_info *tinfo) { guard(mutex)(&trace_user_buffer_mutex); if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) return -1; --tinfo->ref; return tinfo->ref; } /** * trace_user_fault_read - Read user space into a per CPU buffer * @tinfo: The @tinfo allocated by trace_user_fault_get() * @ptr: The user space pointer to read * @size: The size of user space to read. * @copy_func: Optional function to use to copy from user space * @data: Data to pass to copy_func if it was supplied * * Preemption must be disabled when this is called, and must not * be enabled while using the returned buffer. * This does the copying from user space into a per CPU buffer. * * The @size must not be greater than the size passed in to * trace_user_fault_init(). * * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), * otherwise it will call @copy_func. It will call @copy_func with: * * buffer: the per CPU buffer of the @tinfo. * ptr: The pointer @ptr to user space to read * size: The @size of the ptr to read * data: The @data parameter * * It is expected that @copy_func will return 0 on success and non zero * if there was a fault. * * Returns a pointer to the buffer with the content read from @ptr. * Preemption must remain disabled while the caller accesses the * buffer returned by this function. * Returns NULL if there was a fault, or the size passed in is * greater than the size passed to trace_user_fault_init(). */ char *trace_user_fault_read(struct trace_user_buf_info *tinfo, const char __user *ptr, size_t size, trace_user_buf_copy copy_func, void *data) { int cpu = smp_processor_id(); char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; unsigned int cnt; int trys = 0; int ret; lockdep_assert_preemption_disabled(); /* * It's up to the caller to not try to copy more than it said * it would. */ if (size > tinfo->size) return NULL; /* * This acts similar to a seqcount. The per CPU context switches are * recorded, migration is disabled and preemption is enabled. The * read of the user space memory is copied into the per CPU buffer. * Preemption is disabled again, and if the per CPU context switches count * is still the same, it means the buffer has not been corrupted. * If the count is different, it is assumed the buffer is corrupted * and reading must be tried again. */ do { /* * If for some reason, copy_from_user() always causes a context * switch, this would then cause an infinite loop. * If this task is preempted by another user space task, it * will cause this task to try again. But just in case something * changes where the copying from user space causes another task * to run, prevent this from going into an infinite loop. * 100 tries should be plenty. */ if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) return NULL; /* Read the current CPU context switch counter */ cnt = nr_context_switches_cpu(cpu); /* * Preemption is going to be enabled, but this task must * remain on this CPU. */ migrate_disable(); /* * Now preemption is being enabled and another task can come in * and use the same buffer and corrupt our data. */ preempt_enable_notrace(); /* Make sure preemption is enabled here */ lockdep_assert_preemption_enabled(); if (copy_func) { ret = copy_func(buffer, ptr, size, data); } else { ret = __copy_from_user(buffer, ptr, size); } preempt_disable_notrace(); migrate_enable(); /* if it faulted, no need to test if the buffer was corrupted */ if (ret) return NULL; /* * Preemption is disabled again, now check the per CPU context * switch counter. If it doesn't match, then another user space * process may have schedule in and corrupted our buffer. In that * case the copying must be retried. */ } while (nr_context_switches_cpu(cpu) != cnt); return buffer; } static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { struct trace_array *tr = filp->private_data; ssize_t written = -ENODEV; unsigned long ip; char *buf; if (tracing_disabled) return -EINVAL; if (!(tr->trace_flags & TRACE_ITER(MARKERS))) return -EINVAL; if ((ssize_t)cnt < 0) return -EINVAL; if (cnt > TRACE_MARKER_MAX_SIZE) cnt = TRACE_MARKER_MAX_SIZE; /* Must have preemption disabled while having access to the buffer */ guard(preempt_notrace)(); buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); if (!buf) return -EFAULT; /* The selftests expect this function to be the IP address */ ip = _THIS_IP_; /* The global trace_marker can go to multiple instances */ if (tr == &global_trace) { guard(rcu)(); list_for_each_entry_rcu(tr, &marker_copies, marker_list) { written = write_marker_to_buffer(tr, buf, cnt, ip); if (written < 0) break; } } else { written = write_marker_to_buffer(tr, buf, cnt, ip); } return written; } static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, const char *buf, size_t cnt) { struct ring_buffer_event *event; struct trace_buffer *buffer; struct raw_data_entry *entry; ssize_t written; size_t size; /* cnt includes both the entry->id and the data behind it. */ size = struct_offset(entry, id) + cnt; buffer = tr->array_buffer.buffer; if (size > ring_buffer_max_event_size(buffer)) return -EINVAL; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, tracing_gen_ctx()); if (!event) /* Ring buffer disabled, return as if not open for write */ return -EBADF; entry = ring_buffer_event_data(event); unsafe_memcpy(&entry->id, buf, cnt, "id and content already reserved on ring buffer" "'buf' includes the 'id' and the data." "'entry' was allocated with cnt from 'id'."); written = cnt; __buffer_unlock_commit(buffer, event); return written; } static ssize_t tracing_mark_raw_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { struct trace_array *tr = filp->private_data; ssize_t written = -ENODEV; char *buf; if (tracing_disabled) return -EINVAL; if (!(tr->trace_flags & TRACE_ITER(MARKERS))) return -EINVAL; /* The marker must at least have a tag id */ if (cnt < sizeof(unsigned int)) return -EINVAL; /* raw write is all or nothing */ if (cnt > TRACE_MARKER_MAX_SIZE) return -EINVAL; /* Must have preemption disabled while having access to the buffer */ guard(preempt_notrace)(); buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); if (!buf) return -EFAULT; /* The global trace_marker_raw can go to multiple instances */ if (tr == &global_trace) { guard(rcu)(); list_for_each_entry_rcu(tr, &marker_copies, marker_list) { written = write_raw_marker_to_buffer(tr, buf, cnt); if (written < 0) break; } } else { written = write_raw_marker_to_buffer(tr, buf, cnt); } return written; } static int tracing_mark_open(struct inode *inode, struct file *filp) { int ret; scoped_guard(mutex, &trace_user_buffer_mutex) { if (!trace_user_buffer) { ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); if (ret < 0) return ret; } else { trace_user_buffer->ref++; } } stream_open(inode, filp); ret = tracing_open_generic_tr(inode, filp); if (ret < 0) user_buffer_put(&trace_user_buffer); return ret; } static int tracing_mark_release(struct inode *inode, struct file *file) { user_buffer_put(&trace_user_buffer); return tracing_release_generic_tr(inode, file); } static int tracing_clock_show(struct seq_file *m, void *v) { struct trace_array *tr = m->private; int i; for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) seq_printf(m, "%s%s%s%s", i ? " " : "", i == tr->clock_id ? "[" : "", trace_clocks[i].name, i == tr->clock_id ? "]" : ""); seq_putc(m, '\n'); return 0; } int tracing_set_clock(struct trace_array *tr, const char *clockstr) { int i; for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { if (strcmp(trace_clocks[i].name, clockstr) == 0) break; } if (i == ARRAY_SIZE(trace_clocks)) return -EINVAL; guard(mutex)(&trace_types_lock); tr->clock_id = i; ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); /* * New clock may not be consistent with the previous clock. * Reset the buffer so that it doesn't have incomparable timestamps. */ tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE if (tr->max_buffer.buffer) ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->max_buffer); #endif if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { struct trace_scratch *tscratch = tr->scratch; tscratch->clock_id = i; } return 0; } static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { struct seq_file *m = filp->private_data; struct trace_array *tr = m->private; char buf[64]; const char *clockstr; int ret; if (cnt >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; clockstr = strstrip(buf); ret = tracing_set_clock(tr, clockstr); if (ret) return ret; *fpos += cnt; return cnt; } static int tracing_clock_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; ret = single_open(file, tracing_clock_show, inode->i_private); if (ret < 0) trace_array_put(tr); return ret; } static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) { struct trace_array *tr = m->private; guard(mutex)(&trace_types_lock); if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) seq_puts(m, "delta [absolute]\n"); else seq_puts(m, "[delta] absolute\n"); return 0; } static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); if (ret < 0) trace_array_put(tr); return ret; } u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) { if (rbe == this_cpu_read(trace_buffered_event)) return ring_buffer_time_stamp(buffer); return ring_buffer_event_time_stamp(buffer, rbe); } /* * Set or disable using the per CPU trace_buffer_event when possible. */ int tracing_set_filter_buffering(struct trace_array *tr, bool set) { guard(mutex)(&trace_types_lock); if (set && tr->no_filter_buffering_ref++) return 0; if (!set) { if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) return -EINVAL; --tr->no_filter_buffering_ref; } return 0; } struct ftrace_buffer_info { struct trace_iterator iter; void *spare; unsigned int spare_cpu; unsigned int spare_size; unsigned int read; }; #ifdef CONFIG_TRACER_SNAPSHOT static int tracing_snapshot_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; struct seq_file *m; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; if (file->f_mode & FMODE_READ) { iter = __tracing_open(inode, file, true); if (IS_ERR(iter)) ret = PTR_ERR(iter); } else { /* Writes still need the seq_file to hold the private data */ ret = -ENOMEM; m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) goto out; iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { kfree(m); goto out; } ret = 0; iter->tr = tr; iter->array_buffer = &tr->max_buffer; iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; } out: if (ret < 0) trace_array_put(tr); return ret; } static void tracing_swap_cpu_buffer(void *tr) { update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); } static ssize_t tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct seq_file *m = filp->private_data; struct trace_iterator *iter = m->private; struct trace_array *tr = iter->tr; unsigned long val; int ret; ret = tracing_update_buffers(tr); if (ret < 0) return ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; guard(mutex)(&trace_types_lock); if (tr->current_trace->use_max_tr) return -EBUSY; local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); local_irq_enable(); if (ret) return ret; switch (val) { case 0: if (iter->cpu_file != RING_BUFFER_ALL_CPUS) return -EINVAL; if (tr->allocated_snapshot) free_snapshot(tr); break; case 1: /* Only allow per-cpu swap if the ring buffer supports it */ #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP if (iter->cpu_file != RING_BUFFER_ALL_CPUS) return -EINVAL; #endif if (tr->allocated_snapshot) ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->array_buffer, iter->cpu_file); ret = tracing_arm_snapshot_locked(tr); if (ret) return ret; /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { local_irq_disable(); update_max_tr(tr, current, smp_processor_id(), NULL); local_irq_enable(); } else { smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, (void *)tr, 1); } tracing_disarm_snapshot(tr); break; default: if (tr->allocated_snapshot) { if (iter->cpu_file == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->max_buffer); else tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); } break; } if (ret >= 0) { *ppos += cnt; ret = cnt; } return ret; } static int tracing_snapshot_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; int ret; ret = tracing_release(inode, file); if (file->f_mode & FMODE_READ) return ret; /* If write only, the seq_file is just a stub */ if (m) kfree(m->private); kfree(m); return 0; } static int tracing_buffers_open(struct inode *inode, struct file *filp); static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos); static int tracing_buffers_release(struct inode *inode, struct file *file); static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); static int snapshot_raw_open(struct inode *inode, struct file *filp) { struct ftrace_buffer_info *info; int ret; /* The following checks for tracefs lockdown */ ret = tracing_buffers_open(inode, filp); if (ret < 0) return ret; info = filp->private_data; if (info->iter.trace->use_max_tr) { tracing_buffers_release(inode, filp); return -EBUSY; } info->iter.snapshot = true; info->iter.array_buffer = &info->iter.tr->max_buffer; return ret; } #endif /* CONFIG_TRACER_SNAPSHOT */ static const struct file_operations tracing_thresh_fops = { .open = tracing_open_generic, .read = tracing_thresh_read, .write = tracing_thresh_write, .llseek = generic_file_llseek, }; #ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic_tr, .read = tracing_max_lat_read, .write = tracing_max_lat_write, .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; #endif static const struct file_operations set_tracer_fops = { .open = tracing_open_generic_tr, .read = tracing_set_trace_read, .write = tracing_set_trace_write, .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_pipe_fops = { .open = tracing_open_pipe, .poll = tracing_poll_pipe, .read = tracing_read_pipe, .splice_read = tracing_splice_read_pipe, .release = tracing_release_pipe, }; static const struct file_operations tracing_entries_fops = { .open = tracing_open_generic_tr, .read = tracing_entries_read, .write = tracing_entries_write, .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_syscall_buf_fops = { .open = tracing_open_generic_tr, .read = tracing_syscall_buf_read, .write = tracing_syscall_buf_write, .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_buffer_meta_fops = { .open = tracing_buffer_meta_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_seq_release, }; static const struct file_operations tracing_total_entries_fops = { .open = tracing_open_generic_tr, .read = tracing_total_entries_read, .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_free_buffer_fops = { .open = tracing_open_generic_tr, .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, }; static const struct file_operations tracing_mark_fops = { .open = tracing_mark_open, .write = tracing_mark_write, .release = tracing_mark_release, }; static const struct file_operations tracing_mark_raw_fops = { .open = tracing_mark_open, .write = tracing_mark_raw_write, .release = tracing_mark_release, }; static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_single_release_tr, .write = tracing_clock_write, }; static const struct file_operations trace_time_stamp_mode_fops = { .open = tracing_time_stamp_mode_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_single_release_tr, }; static const struct file_operations last_boot_fops = { .open = tracing_last_boot_open, .read = seq_read, .llseek = seq_lseek, .release = tracing_seq_release, }; #ifdef CONFIG_TRACER_SNAPSHOT static const struct file_operations snapshot_fops = { .open = tracing_snapshot_open, .read = seq_read, .write = tracing_snapshot_write, .llseek = tracing_lseek, .release = tracing_snapshot_release, }; static const struct file_operations snapshot_raw_fops = { .open = snapshot_raw_open, .read = tracing_buffers_read, .release = tracing_buffers_release, .splice_read = tracing_buffers_splice_read, }; #endif /* CONFIG_TRACER_SNAPSHOT */ /* * trace_min_max_write - Write a u64 value to a trace_min_max_param struct * @filp: The active open file structure * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function implements the write interface for a struct trace_min_max_param. * The filp->private_data must point to a trace_min_max_param structure that * defines where to write the value, the min and the max acceptable values, * and a lock to protect the write. */ static ssize_t trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_min_max_param *param = filp->private_data; u64 val; int err; if (!param) return -EFAULT; err = kstrtoull_from_user(ubuf, cnt, 10, &val); if (err) return err; if (param->lock) mutex_lock(param->lock); if (param->min && val < *param->min) err = -EINVAL; if (param->max && val > *param->max) err = -EINVAL; if (!err) *param->val = val; if (param->lock) mutex_unlock(param->lock); if (err) return err; return cnt; } /* * trace_min_max_read - Read a u64 value from a trace_min_max_param struct * @filp: The active open file structure * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * This function implements the read interface for a struct trace_min_max_param. * The filp->private_data must point to a trace_min_max_param struct with valid * data. */ static ssize_t trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_min_max_param *param = filp->private_data; char buf[U64_STR_SIZE]; int len; u64 val; if (!param) return -EFAULT; val = *param->val; if (cnt > sizeof(buf)) cnt = sizeof(buf); len = snprintf(buf, sizeof(buf), "%llu\n", val); return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); } const struct file_operations trace_min_max_fops = { .open = tracing_open_generic, .read = trace_min_max_read, .write = trace_min_max_write, }; #define TRACING_LOG_ERRS_MAX 8 #define TRACING_LOG_LOC_MAX 128 #define CMD_PREFIX " Command: " struct err_info { const char **errs; /* ptr to loc-specific array of err strings */ u8 type; /* index into errs -> specific err string */ u16 pos; /* caret position */ u64 ts; }; struct tracing_log_err { struct list_head list; struct err_info info; char loc[TRACING_LOG_LOC_MAX]; /* err location */ char *cmd; /* what caused err */ }; static DEFINE_MUTEX(tracing_err_log_lock); static struct tracing_log_err *alloc_tracing_log_err(int len) { struct tracing_log_err *err; err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) return ERR_PTR(-ENOMEM); err->cmd = kzalloc(len, GFP_KERNEL); if (!err->cmd) { kfree(err); return ERR_PTR(-ENOMEM); } return err; } static void free_tracing_log_err(struct tracing_log_err *err) { kfree(err->cmd); kfree(err); } static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, int len) { struct tracing_log_err *err; char *cmd; if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { err = alloc_tracing_log_err(len); if (PTR_ERR(err) != -ENOMEM) tr->n_err_log_entries++; return err; } cmd = kzalloc(len, GFP_KERNEL); if (!cmd) return ERR_PTR(-ENOMEM); err = list_first_entry(&tr->err_log, struct tracing_log_err, list); kfree(err->cmd); err->cmd = cmd; list_del(&err->list); return err; } /** * err_pos - find the position of a string within a command for error careting * @cmd: The tracing command that caused the error * @str: The string to position the caret at within @cmd * * Finds the position of the first occurrence of @str within @cmd. The * return value can be passed to tracing_log_err() for caret placement * within @cmd. * * Returns the index within @cmd of the first occurrence of @str or 0 * if @str was not found. */ unsigned int err_pos(char *cmd, const char *str) { char *found; if (WARN_ON(!strlen(cmd))) return 0; found = strstr(cmd, str); if (found) return found - cmd; return 0; } /** * tracing_log_err - write an error to the tracing error log * @tr: The associated trace array for the error (NULL for top level array) * @loc: A string describing where the error occurred * @cmd: The tracing command that caused the error * @errs: The array of loc-specific static error strings * @type: The index into errs[], which produces the specific static err string * @pos: The position the caret should be placed in the cmd * * Writes an error into tracing/error_log of the form: * * <loc>: error: <text> * Command: <cmd> * ^ * * tracing/error_log is a small log file containing the last * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated * unless there has been a tracing error, and the error log can be * cleared and have its memory freed by writing the empty string in * truncation mode to it i.e. echo > tracing/error_log. * * NOTE: the @errs array along with the @type param are used to * produce a static error string - this string is not copied and saved * when the error is logged - only a pointer to it is saved. See * existing callers for examples of how static strings are typically * defined for use with tracing_log_err(). */ void tracing_log_err(struct trace_array *tr, const char *loc, const char *cmd, const char **errs, u8 type, u16 pos) { struct tracing_log_err *err; int len = 0; if (!tr) tr = &global_trace; len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; guard(mutex)(&tracing_err_log_lock); err = get_tracing_log_err(tr, len); if (PTR_ERR(err) == -ENOMEM) return; snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); err->info.errs = errs; err->info.type = type; err->info.pos = pos; err->info.ts = local_clock(); list_add_tail(&err->list, &tr->err_log); } static void clear_tracing_err_log(struct trace_array *tr) { struct tracing_log_err *err, *next; guard(mutex)(&tracing_err_log_lock); list_for_each_entry_safe(err, next, &tr->err_log, list) { list_del(&err->list); free_tracing_log_err(err); } tr->n_err_log_entries = 0; } static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) { struct trace_array *tr = m->private; mutex_lock(&tracing_err_log_lock); return seq_list_start(&tr->err_log, *pos); } static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_array *tr = m->private; return seq_list_next(v, &tr->err_log, pos); } static void tracing_err_log_seq_stop(struct seq_file *m, void *v) { mutex_unlock(&tracing_err_log_lock); } static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) { u16 i; for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) seq_putc(m, ' '); for (i = 0; i < pos; i++) seq_putc(m, ' '); seq_puts(m, "^\n"); } static int tracing_err_log_seq_show(struct seq_file *m, void *v) { struct tracing_log_err *err = v; if (err) { const char *err_text = err->info.errs[err->info.type]; u64 sec = err->info.ts; u32 nsec; nsec = do_div(sec, NSEC_PER_SEC); seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, err->loc, err_text); seq_printf(m, "%s", err->cmd); tracing_err_log_show_pos(m, err->info.pos); } return 0; } static const struct seq_operations tracing_err_log_seq_ops = { .start = tracing_err_log_seq_start, .next = tracing_err_log_seq_next, .stop = tracing_err_log_seq_stop, .show = tracing_err_log_seq_show }; static int tracing_err_log_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; int ret = 0; ret = tracing_check_open_get_tr(tr); if (ret) return ret; /* If this file was opened for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) clear_tracing_err_log(tr); if (file->f_mode & FMODE_READ) { ret = seq_open(file, &tracing_err_log_seq_ops); if (!ret) { struct seq_file *m = file->private_data; m->private = tr; } else { trace_array_put(tr); } } return ret; } static ssize_t tracing_err_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { return count; } static int tracing_err_log_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; trace_array_put(tr); if (file->f_mode & FMODE_READ) seq_release(inode, file); return 0; } static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, .llseek = tracing_lseek, .release = tracing_err_log_release, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; struct ftrace_buffer_info *info; int ret; ret = tracing_check_open_get_tr(tr); if (ret) return ret; info = kvzalloc(sizeof(*info), GFP_KERNEL); if (!info) { trace_array_put(tr); return -ENOMEM; } mutex_lock(&trace_types_lock); info->iter.tr = tr; info->iter.cpu_file = tracing_get_cpu(inode); info->iter.trace = tr->current_trace; info->iter.array_buffer = &tr->array_buffer; info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; filp->private_data = info; tr->trace_ref++; mutex_unlock(&trace_types_lock); ret = nonseekable_open(inode, filp); if (ret < 0) trace_array_put(tr); return ret; } static __poll_t tracing_buffers_poll(struct file *filp, poll_table *poll_table) { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; return trace_poll(iter, filp, poll_table); } static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; void *trace_data; int page_size; ssize_t ret = 0; ssize_t size; if (!count) return 0; #ifdef CONFIG_TRACER_MAX_TRACE if (iter->snapshot && iter->tr->current_trace->use_max_tr) return -EBUSY; #endif page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); /* Make sure the spare matches the current sub buffer size */ if (info->spare) { if (page_size != info->spare_size) { ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); info->spare = NULL; } } if (!info->spare) { info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, iter->cpu_file); if (IS_ERR(info->spare)) { ret = PTR_ERR(info->spare); info->spare = NULL; } else { info->spare_cpu = iter->cpu_file; info->spare_size = page_size; } } if (!info->spare) return ret; /* Do we have previous read data to read? */ if (info->read < page_size) goto read; again: trace_access_lock(iter->cpu_file); ret = ring_buffer_read_page(iter->array_buffer->buffer, info->spare, count, iter->cpu_file, 0); trace_access_unlock(iter->cpu_file); if (ret < 0) { if (trace_empty(iter) && !iter->closed) { if (update_last_data_if_empty(iter->tr)) return 0; if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; ret = wait_on_pipe(iter, 0); if (ret) return ret; goto again; } return 0; } info->read = 0; read: size = page_size - info->read; if (size > count) size = count; trace_data = ring_buffer_read_page_data(info->spare); ret = copy_to_user(ubuf, trace_data + info->read, size); if (ret == size) return -EFAULT; size -= ret; *ppos += size; info->read += size; return size; } static int tracing_buffers_flush(struct file *file, fl_owner_t id) { struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; iter->closed = true; /* Make sure the waiters see the new wait_index */ (void)atomic_fetch_inc_release(&iter->wait_index); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); return 0; } static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; guard(mutex)(&trace_types_lock); iter->tr->trace_ref--; __trace_array_put(iter->tr); if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); kvfree(info); return 0; } struct buffer_ref { struct trace_buffer *buffer; void *page; int cpu; refcount_t refcount; }; static void buffer_ref_release(struct buffer_ref *ref) { if (!refcount_dec_and_test(&ref->refcount)) return; ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); kfree(ref); } static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; buffer_ref_release(ref); buf->private = 0; } static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct buffer_ref *ref = (struct buffer_ref *)buf->private; if (refcount_read(&ref->refcount) > INT_MAX/2) return false; refcount_inc(&ref->refcount); return true; } /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .release = buffer_pipe_buf_release, .get = buffer_pipe_buf_get, }; /* * Callback from splice_to_pipe(), if we need to release some pages * at the end of the spd in case we error'ed out in filling the pipe. */ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) { struct buffer_ref *ref = (struct buffer_ref *)spd->partial[i].private; buffer_ref_release(ref); spd->partial[i].private = 0; } static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; struct partial_page partial_def[PIPE_DEF_BUFFERS]; struct page *pages_def[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages_def, .partial = partial_def, .nr_pages_max = PIPE_DEF_BUFFERS, .ops = &buffer_pipe_buf_ops, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; bool woken = false; int page_size; int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE if (iter->snapshot && iter->tr->current_trace->use_max_tr) return -EBUSY; #endif page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); if (*ppos & (page_size - 1)) return -EINVAL; if (len & (page_size - 1)) { if (len < page_size) return -EINVAL; len &= (~(page_size - 1)); } if (splice_grow_spd(pipe, &spd)) return -ENOMEM; again: trace_access_lock(iter->cpu_file); entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { struct page *page; int r; ref = kzalloc(sizeof(*ref), GFP_KERNEL); if (!ref) { ret = -ENOMEM; break; } refcount_set(&ref->refcount, 1); ref->buffer = iter->array_buffer->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); if (IS_ERR(ref->page)) { ret = PTR_ERR(ref->page); ref->page = NULL; kfree(ref); break; } ref->cpu = iter->cpu_file; r = ring_buffer_read_page(ref->buffer, ref->page, len, iter->cpu_file, 1); if (r < 0) { ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); kfree(ref); break; } page = virt_to_page(ring_buffer_read_page_data(ref->page)); spd.pages[i] = page; spd.partial[i].len = page_size; spd.partial[i].offset = 0; spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; *ppos += page_size; entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); } trace_access_unlock(iter->cpu_file); spd.nr_pages = i; /* did we read anything? */ if (!spd.nr_pages) { if (ret) goto out; if (woken) goto out; ret = -EAGAIN; if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) goto out; ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); if (ret) goto out; /* No need to wait after waking up when tracing is off */ if (!tracer_tracing_is_on(iter->tr)) goto out; /* Iterate one more time to collect any new data then exit */ woken = true; goto again; } ret = splice_to_pipe(pipe, &spd); out: splice_shrink_spd(&spd); return ret; } static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; int err; if (cmd == TRACE_MMAP_IOCTL_GET_READER) { if (!(file->f_flags & O_NONBLOCK)) { err = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, iter->tr->buffer_percent, NULL, NULL); if (err) return err; } return ring_buffer_map_get_reader(iter->array_buffer->buffer, iter->cpu_file); } else if (cmd) { return -ENOTTY; } /* * An ioctl call with cmd 0 to the ring buffer file will wake up all * waiters */ guard(mutex)(&trace_types_lock); /* Make sure the waiters see the new wait_index */ (void)atomic_fetch_inc_release(&iter->wait_index); ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); return 0; } #ifdef CONFIG_TRACER_MAX_TRACE static int get_snapshot_map(struct trace_array *tr) { int err = 0; /* * Called with mmap_lock held. lockdep would be unhappy if we would now * take trace_types_lock. Instead use the specific * snapshot_trigger_lock. */ spin_lock(&tr->snapshot_trigger_lock); if (tr->snapshot || tr->mapped == UINT_MAX) err = -EBUSY; else tr->mapped++; spin_unlock(&tr->snapshot_trigger_lock); /* Wait for update_max_tr() to observe iter->tr->mapped */ if (tr->mapped == 1) synchronize_rcu(); return err; } static void put_snapshot_map(struct trace_array *tr) { spin_lock(&tr->snapshot_trigger_lock); if (!WARN_ON(!tr->mapped)) tr->mapped--; spin_unlock(&tr->snapshot_trigger_lock); } #else static inline int get_snapshot_map(struct trace_array *tr) { return 0; } static inline void put_snapshot_map(struct trace_array *tr) { } #endif static void tracing_buffers_mmap_close(struct vm_area_struct *vma) { struct ftrace_buffer_info *info = vma->vm_file->private_data; struct trace_iterator *iter = &info->iter; WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); put_snapshot_map(iter->tr); } static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) { /* * Trace buffer mappings require the complete buffer including * the meta page. Partial mappings are not supported. */ return -EINVAL; } static const struct vm_operations_struct tracing_buffers_vmops = { .close = tracing_buffers_mmap_close, .may_split = tracing_buffers_may_split, }; static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) { struct ftrace_buffer_info *info = filp->private_data; struct trace_iterator *iter = &info->iter; int ret = 0; /* A memmap'ed and backup buffers are not supported for user space mmap */ if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) return -ENODEV; ret = get_snapshot_map(iter->tr); if (ret) return ret; ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); if (ret) put_snapshot_map(iter->tr); vma->vm_ops = &tracing_buffers_vmops; return ret; } static const struct file_operations tracing_buffers_fops = { .open = tracing_buffers_open, .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, .flush = tracing_buffers_flush, .splice_read = tracing_buffers_splice_read, .unlocked_ioctl = tracing_buffers_ioctl, .mmap = tracing_buffers_mmap, }; static ssize_t tracing_stats_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct inode *inode = file_inode(filp); struct trace_array *tr = inode->i_private; struct array_buffer *trace_buf = &tr->array_buffer; int cpu = tracing_get_cpu(inode); struct trace_seq *s; unsigned long cnt; unsigned long long t; unsigned long usec_rem; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "entries: %ld\n", cnt); cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "overrun: %ld\n", cnt); cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "commit overrun: %ld\n", cnt); cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "bytes: %ld\n", cnt); if (trace_clocks[tr->clock_id].in_ns) { /* local or global for trace_clock */ t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); usec_rem = do_div(t, USEC_PER_SEC); trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); } else { /* counter or tsc mode for trace_clock */ trace_seq_printf(s, "oldest event ts: %llu\n", ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); trace_seq_printf(s, "now ts: %llu\n", ring_buffer_time_stamp(trace_buf->buffer)); } cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "dropped events: %ld\n", cnt); cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); trace_seq_printf(s, "read events: %ld\n", cnt); count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, trace_seq_used(s)); kfree(s); return count; } static const struct file_operations tracing_stats_fops = { .open = tracing_open_generic_tr, .read = tracing_stats_read, .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; #ifdef CONFIG_DYNAMIC_FTRACE static ssize_t tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { ssize_t ret; char *buf; int r; /* 512 should be plenty to hold the amount needed */ #define DYN_INFO_BUF_SIZE 512 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; r = scnprintf(buf, DYN_INFO_BUF_SIZE, "%ld pages:%ld groups: %ld\n" "ftrace boot update time = %llu (ns)\n" "ftrace module total update time = %llu (ns)\n", ftrace_update_tot_cnt, ftrace_number_of_pages, ftrace_number_of_groups, ftrace_update_time, ftrace_total_mod_time); ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); return ret; } static const struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, .read = tracing_read_dyn_info, .llseek = generic_file_llseek, }; #endif /* CONFIG_DYNAMIC_FTRACE */ #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) static void ftrace_snapshot(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data) { tracing_snapshot_instance(tr); } static void ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, struct trace_array *tr, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; long *count = NULL; if (mapper) count = (long *)ftrace_func_mapper_find_ip(mapper, ip); if (count) { if (*count <= 0) return; (*count)--; } tracing_snapshot_instance(tr); } static int ftrace_snapshot_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_mapper *mapper = data; long *count = NULL; seq_printf(m, "%ps:", (void *)ip); seq_puts(m, "snapshot"); if (mapper) count = (long *)ftrace_func_mapper_find_ip(mapper, ip); if (count) seq_printf(m, ":count=%ld\n", *count); else seq_puts(m, ":unlimited\n"); return 0; } static int ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *init_data, void **data) { struct ftrace_func_mapper *mapper = *data; if (!mapper) { mapper = allocate_ftrace_func_mapper(); if (!mapper) return -ENOMEM; *data = mapper; } return ftrace_func_mapper_add_ip(mapper, ip, init_data); } static void ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, unsigned long ip, void *data) { struct ftrace_func_mapper *mapper = data; if (!ip) { if (!mapper) return; free_ftrace_func_mapper(mapper, NULL); return; } ftrace_func_mapper_remove_ip(mapper, ip); } static struct ftrace_probe_ops snapshot_probe_ops = { .func = ftrace_snapshot, .print = ftrace_snapshot_print, }; static struct ftrace_probe_ops snapshot_count_probe_ops = { .func = ftrace_count_snapshot, .print = ftrace_snapshot_print, .init = ftrace_snapshot_init, .free = ftrace_snapshot_free, }; static int ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { struct ftrace_probe_ops *ops; void *count = (void *)-1; char *number; int ret; if (!tr) return -ENODEV; /* hash funcs only work with set_ftrace_filter */ if (!enable) return -EINVAL; ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; if (glob[0] == '!') { ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); if (!ret) tracing_disarm_snapshot(tr); return ret; } if (!param) goto out_reg; number = strsep(&param, ":"); if (!strlen(number)) goto out_reg; /* * We use the callback data field (which is a pointer) * as our counter. */ ret = kstrtoul(number, 0, (unsigned long *)&count); if (ret) return ret; out_reg: ret = tracing_arm_snapshot(tr); if (ret < 0) return ret; ret = register_ftrace_function_probe(glob, tr, ops, count); if (ret < 0) tracing_disarm_snapshot(tr); return ret < 0 ? ret : 0; } static struct ftrace_func_command ftrace_snapshot_cmd = { .name = "snapshot", .func = ftrace_trace_snapshot_callback, }; static __init int register_snapshot_cmd(void) { return register_ftrace_command(&ftrace_snapshot_cmd); } #else static inline __init int register_snapshot_cmd(void) { return 0; } #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ static struct dentry *tracing_get_dentry(struct trace_array *tr) { /* Top directory uses NULL as the parent */ if (tr->flags & TRACE_ARRAY_FL_GLOBAL) return NULL; if (WARN_ON(!tr->dir)) return ERR_PTR(-ENODEV); /* All sub buffers have a descriptor */ return tr->dir; } static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) { struct dentry *d_tracer; if (tr->percpu_dir) return tr->percpu_dir; d_tracer = tracing_get_dentry(tr); if (IS_ERR(d_tracer)) return NULL; tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); MEM_FAIL(!tr->percpu_dir, "Could not create tracefs directory 'per_cpu/%d'\n", cpu); return tr->percpu_dir; } static struct dentry * trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, void *data, long cpu, const struct file_operations *fops) { struct dentry *ret = trace_create_file(name, mode, parent, data, fops); if (ret) /* See tracing_get_cpu() */ d_inode(ret)->i_cdev = (void *)(cpu + 1); return ret; } static void tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); struct dentry *d_cpu; char cpu_dir[30]; /* 30 characters should be more than enough */ if (!d_percpu) return; snprintf(cpu_dir, 30, "cpu%ld", cpu); d_cpu = tracefs_create_dir(cpu_dir, d_percpu); if (!d_cpu) { pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); return; } /* per cpu trace_pipe */ trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_pipe_fops); /* per cpu trace */ trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, tr, cpu, &tracing_fops); trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_buffers_fops); trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_stats_fops); trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_entries_fops); if (tr->range_addr_start) trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_buffer_meta_fops); #ifdef CONFIG_TRACER_SNAPSHOT if (!tr->range_addr_start) { trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, tr, cpu, &snapshot_fops); trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, tr, cpu, &snapshot_raw_fops); } #endif } #ifdef CONFIG_FTRACE_SELFTEST /* Let selftest have access to static functions in this file */ #include "trace_selftest.c" #endif static ssize_t trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_option_dentry *topt = filp->private_data; char *buf; if (topt->flags->val & topt->opt->bit) buf = "1\n"; else buf = "0\n"; return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); } static ssize_t trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_option_dentry *topt = filp->private_data; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; if (val != 0 && val != 1) return -EINVAL; if (!!(topt->flags->val & topt->opt->bit) != val) { guard(mutex)(&trace_types_lock); ret = __set_tracer_option(topt->tr, topt->flags, topt->opt, !val); if (ret) return ret; } *ppos += cnt; return cnt; } static int tracing_open_options(struct inode *inode, struct file *filp) { struct trace_option_dentry *topt = inode->i_private; int ret; ret = tracing_check_open_get_tr(topt->tr); if (ret) return ret; filp->private_data = inode->i_private; return 0; } static int tracing_release_options(struct inode *inode, struct file *file) { struct trace_option_dentry *topt = file->private_data; trace_array_put(topt->tr); return 0; } static const struct file_operations trace_options_fops = { .open = tracing_open_options, .read = trace_options_read, .write = trace_options_write, .llseek = generic_file_llseek, .release = tracing_release_options, }; /* * In order to pass in both the trace_array descriptor as well as the index * to the flag that the trace option file represents, the trace_array * has a character array of trace_flags_index[], which holds the index * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. * The address of this character array is passed to the flag option file * read/write callbacks. * * In order to extract both the index and the trace_array descriptor, * get_tr_index() uses the following algorithm. * * idx = *ptr; * * As the pointer itself contains the address of the index (remember * index[1] == 1). * * Then to get the trace_array descriptor, by subtracting that index * from the ptr, we get to the start of the index itself. * * ptr - idx == &index[0] * * Then a simple container_of() from that pointer gets us to the * trace_array descriptor. */ static void get_tr_index(void *data, struct trace_array **ptr, unsigned int *pindex) { *pindex = *(unsigned char *)data; *ptr = container_of(data - *pindex, struct trace_array, trace_flags_index); } static ssize_t trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { void *tr_index = filp->private_data; struct trace_array *tr; unsigned int index; char *buf; get_tr_index(tr_index, &tr, &index); if (tr->trace_flags & (1ULL << index)) buf = "1\n"; else buf = "0\n"; return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); } static ssize_t trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { void *tr_index = filp->private_data; struct trace_array *tr; unsigned int index; unsigned long val; int ret; get_tr_index(tr_index, &tr, &index); ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; if (val != 0 && val != 1) return -EINVAL; mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = set_tracer_flag(tr, 1ULL << index, val); mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); if (ret < 0) return ret; *ppos += cnt; return cnt; } static const struct file_operations trace_options_core_fops = { .open = tracing_open_generic, .read = trace_options_core_read, .write = trace_options_core_write, .llseek = generic_file_llseek, }; struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { struct dentry *ret; ret = tracefs_create_file(name, mode, parent, data, fops); if (!ret) pr_warn("Could not create tracefs '%s' entry\n", name); return ret; } static struct dentry *trace_options_init_dentry(struct trace_array *tr) { struct dentry *d_tracer; if (tr->options) return tr->options; d_tracer = tracing_get_dentry(tr); if (IS_ERR(d_tracer)) return NULL; tr->options = tracefs_create_dir("options", d_tracer); if (!tr->options) { pr_warn("Could not create tracefs directory 'options'\n"); return NULL; } return tr->options; } static void create_trace_option_file(struct trace_array *tr, struct trace_option_dentry *topt, struct tracer_flags *flags, struct tracer_opt *opt) { struct dentry *t_options; t_options = trace_options_init_dentry(tr); if (!t_options) return; topt->flags = flags; topt->opt = opt; topt->tr = tr; topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, t_options, topt, &trace_options_fops); } static int create_trace_option_files(struct trace_array *tr, struct tracer *tracer, struct tracer_flags *flags) { struct trace_option_dentry *topts; struct trace_options *tr_topts; struct tracer_opt *opts; int cnt; if (!flags || !flags->opts) return 0; opts = flags->opts; for (cnt = 0; opts[cnt].name; cnt++) ; topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); if (!topts) return 0; tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), GFP_KERNEL); if (!tr_topts) { kfree(topts); return -ENOMEM; } tr->topts = tr_topts; tr->topts[tr->nr_topts].tracer = tracer; tr->topts[tr->nr_topts].topts = topts; tr->nr_topts++; for (cnt = 0; opts[cnt].name; cnt++) { create_trace_option_file(tr, &topts[cnt], flags, &opts[cnt]); MEM_FAIL(topts[cnt].entry == NULL, "Failed to create trace option: %s", opts[cnt].name); } return 0; } static int get_global_flags_val(struct tracer *tracer) { struct tracers *t; list_for_each_entry(t, &global_trace.tracers, list) { if (t->tracer != tracer) continue; if (!t->flags) return -1; return t->flags->val; } return -1; } static int add_tracer_options(struct trace_array *tr, struct tracers *t) { struct tracer *tracer = t->tracer; struct tracer_flags *flags = t->flags ?: tracer->flags; if (!flags) return 0; /* Only add tracer options after update_tracer_options finish */ if (!tracer_options_updated) return 0; return create_trace_option_files(tr, tracer, flags); } static int add_tracer(struct trace_array *tr, struct tracer *tracer) { struct tracer_flags *flags; struct tracers *t; int ret; /* Only enable if the directory has been created already. */ if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) return 0; /* * If this is an instance, only create flags for tracers * the instance may have. */ if (!trace_ok_for_array(tracer, tr)) return 0; t = kmalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; t->tracer = tracer; t->flags = NULL; list_add(&t->list, &tr->tracers); flags = tracer->flags; if (!flags) { if (!tracer->default_flags) return 0; /* * If the tracer defines default flags, it means the flags are * per trace instance. */ flags = kmalloc(sizeof(*flags), GFP_KERNEL); if (!flags) return -ENOMEM; *flags = *tracer->default_flags; flags->trace = tracer; t->flags = flags; /* If this is an instance, inherit the global_trace flags */ if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { int val = get_global_flags_val(tracer); if (!WARN_ON_ONCE(val < 0)) flags->val = val; } } ret = add_tracer_options(tr, t); if (ret < 0) { list_del(&t->list); kfree(t->flags); kfree(t); } return ret; } static struct dentry * create_trace_option_core_file(struct trace_array *tr, const char *option, long index) { struct dentry *t_options; t_options = trace_options_init_dentry(tr); if (!t_options) return NULL; return trace_create_file(option, TRACE_MODE_WRITE, t_options, (void *)&tr->trace_flags_index[index], &trace_options_core_fops); } static void create_trace_options_dir(struct trace_array *tr) { struct dentry *t_options; bool top_level = tr == &global_trace; int i; t_options = trace_options_init_dentry(tr); if (!t_options) return; for (i = 0; trace_options[i]; i++) { if (top_level || !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { create_trace_option_core_file(tr, trace_options[i], i); } } } static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; char buf[64]; int r; r = tracer_tracing_is_on(tr); r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; struct trace_buffer *buffer = tr->array_buffer.buffer; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; if (buffer) { guard(mutex)(&trace_types_lock); if (!!val == tracer_tracing_is_on(tr)) { val = 0; /* do nothing */ } else if (val) { tracer_tracing_on(tr); if (tr->current_trace->start) tr->current_trace->start(tr); } else { tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); /* Wake up any waiters */ ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); } } (*ppos)++; return cnt; } static const struct file_operations rb_simple_fops = { .open = tracing_open_generic_tr, .read = rb_simple_read, .write = rb_simple_write, .release = tracing_release_generic_tr, .llseek = default_llseek, }; static ssize_t buffer_percent_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; char buf[64]; int r; r = tr->buffer_percent; r = sprintf(buf, "%d\n", r); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t buffer_percent_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; unsigned long val; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; if (val > 100) return -EINVAL; tr->buffer_percent = val; (*ppos)++; return cnt; } static const struct file_operations buffer_percent_fops = { .open = tracing_open_generic_tr, .read = buffer_percent_read, .write = buffer_percent_write, .release = tracing_release_generic_tr, .llseek = default_llseek, }; static ssize_t buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; size_t size; char buf[64]; int order; int r; order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); size = (PAGE_SIZE << order) / 1024; r = sprintf(buf, "%zd\n", size); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; unsigned long val; int old_order; int order; int pages; int ret; ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; val *= 1024; /* value passed in is in KB */ pages = DIV_ROUND_UP(val, PAGE_SIZE); order = fls(pages - 1); /* limit between 1 and 128 system pages */ if (order < 0 || order > 7) return -EINVAL; /* Do not allow tracing while changing the order of the ring buffer */ tracing_stop_tr(tr); old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); if (old_order == order) goto out; ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); if (ret) goto out; #ifdef CONFIG_TRACER_MAX_TRACE if (!tr->allocated_snapshot) goto out_max; ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); if (ret) { /* Put back the old order */ cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); if (WARN_ON_ONCE(cnt)) { /* * AARGH! We are left with different orders! * The max buffer is our "snapshot" buffer. * When a tracer needs a snapshot (one of the * latency tracers), it swaps the max buffer * with the saved snap shot. We succeeded to * update the order of the main buffer, but failed to * update the order of the max buffer. But when we tried * to reset the main buffer to the original size, we * failed there too. This is very unlikely to * happen, but if it does, warn and kill all * tracing. */ tracing_disabled = 1; } goto out; } out_max: #endif (*ppos)++; out: if (ret) cnt = ret; tracing_start_tr(tr); return cnt; } static const struct file_operations buffer_subbuf_size_fops = { .open = tracing_open_generic_tr, .read = buffer_subbuf_size_read, .write = buffer_subbuf_size_write, .release = tracing_release_generic_tr, .llseek = default_llseek, }; static struct dentry *trace_instance_dir; static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); #ifdef CONFIG_MODULES static int make_mod_delta(struct module *mod, void *data) { struct trace_module_delta *module_delta; struct trace_scratch *tscratch; struct trace_mod_entry *entry; struct trace_array *tr = data; int i; tscratch = tr->scratch; module_delta = READ_ONCE(tr->module_delta); for (i = 0; i < tscratch->nr_entries; i++) { entry = &tscratch->entries[i]; if (strcmp(mod->name, entry->mod_name)) continue; if (mod->state == MODULE_STATE_GOING) module_delta->delta[i] = 0; else module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base - entry->mod_addr; break; } return 0; } #else static int make_mod_delta(struct module *mod, void *data) { return 0; } #endif static int mod_addr_comp(const void *a, const void *b, const void *data) { const struct trace_mod_entry *e1 = a; const struct trace_mod_entry *e2 = b; return e1->mod_addr > e2->mod_addr ? 1 : -1; } static void setup_trace_scratch(struct trace_array *tr, struct trace_scratch *tscratch, unsigned int size) { struct trace_module_delta *module_delta; struct trace_mod_entry *entry; int i, nr_entries; if (!tscratch) return; tr->scratch = tscratch; tr->scratch_size = size; if (tscratch->text_addr) tr->text_delta = (unsigned long)_text - tscratch->text_addr; if (struct_size(tscratch, entries, tscratch->nr_entries) > size) goto reset; /* Check if each module name is a valid string */ for (i = 0; i < tscratch->nr_entries; i++) { int n; entry = &tscratch->entries[i]; for (n = 0; n < MODULE_NAME_LEN; n++) { if (entry->mod_name[n] == '\0') break; if (!isprint(entry->mod_name[n])) goto reset; } if (n == MODULE_NAME_LEN) goto reset; } /* Sort the entries so that we can find appropriate module from address. */ nr_entries = tscratch->nr_entries; sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), mod_addr_comp, NULL, NULL); if (IS_ENABLED(CONFIG_MODULES)) { module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); if (!module_delta) { pr_info("module_delta allocation failed. Not able to decode module address."); goto reset; } init_rcu_head(&module_delta->rcu); } else module_delta = NULL; WRITE_ONCE(tr->module_delta, module_delta); /* Scan modules to make text delta for modules. */ module_for_each_mod(make_mod_delta, tr); /* Set trace_clock as the same of the previous boot. */ if (tscratch->clock_id != tr->clock_id) { if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { pr_info("the previous trace_clock info is not valid."); goto reset; } } return; reset: /* Invalid trace modules */ memset(tscratch, 0, size); } static int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) { enum ring_buffer_flags rb_flags; struct trace_scratch *tscratch; unsigned int scratch_size = 0; rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; buf->tr = tr; if (tr->range_addr_start && tr->range_addr_size) { /* Add scratch buffer to handle 128 modules */ buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, tr->range_addr_start, tr->range_addr_size, struct_size(tscratch, entries, 128)); tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); setup_trace_scratch(tr, tscratch, scratch_size); /* * This is basically the same as a mapped buffer, * with the same restrictions. */ tr->mapped++; } else { buf->buffer = ring_buffer_alloc(size, rb_flags); } if (!buf->buffer) return -ENOMEM; buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); buf->buffer = NULL; return -ENOMEM; } /* Allocate the first page for all buffers */ set_buffer_entries(&tr->array_buffer, ring_buffer_size(tr->array_buffer.buffer, 0)); return 0; } static void free_trace_buffer(struct array_buffer *buf) { if (buf->buffer) { ring_buffer_free(buf->buffer); buf->buffer = NULL; free_percpu(buf->data); buf->data = NULL; } } static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; ret = allocate_trace_buffer(tr, &tr->array_buffer, size); if (ret) return ret; #ifdef CONFIG_TRACER_MAX_TRACE /* Fix mapped buffer trace arrays do not have snapshot buffers */ if (tr->range_addr_start) return 0; ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { free_trace_buffer(&tr->array_buffer); return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; allocate_snapshot = false; #endif return 0; } static void free_trace_buffers(struct trace_array *tr) { if (!tr) return; free_trace_buffer(&tr->array_buffer); kfree(tr->module_delta); #ifdef CONFIG_TRACER_MAX_TRACE free_trace_buffer(&tr->max_buffer); #endif } static void init_trace_flags_index(struct trace_array *tr) { int i; /* Used by the trace options files */ for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) tr->trace_flags_index[i] = i; } static int __update_tracer(struct trace_array *tr) { struct tracer *t; int ret = 0; for (t = trace_types; t && !ret; t = t->next) ret = add_tracer(tr, t); return ret; } static __init int __update_tracer_options(struct trace_array *tr) { struct tracers *t; int ret = 0; list_for_each_entry(t, &tr->tracers, list) { ret = add_tracer_options(tr, t); if (ret < 0) break; } return ret; } static __init void update_tracer_options(void) { struct trace_array *tr; guard(mutex)(&trace_types_lock); tracer_options_updated = true; list_for_each_entry(tr, &ftrace_trace_arrays, list) __update_tracer_options(tr); } /* Must have trace_types_lock held */ struct trace_array *trace_array_find(const char *instance) { struct trace_array *tr, *found = NULL; list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (tr->name && strcmp(tr->name, instance) == 0) { found = tr; break; } } return found; } struct trace_array *trace_array_find_get(const char *instance) { struct trace_array *tr; guard(mutex)(&trace_types_lock); tr = trace_array_find(instance); if (tr) tr->ref++; return tr; } static int trace_array_create_dir(struct trace_array *tr) { int ret; tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); if (!tr->dir) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); if (ret) { tracefs_remove(tr->dir); return ret; } init_tracer_tracefs(tr, tr->dir); ret = __update_tracer(tr); if (ret) { event_trace_del_tracer(tr); tracefs_remove(tr->dir); return ret; } return 0; } static struct trace_array * trace_array_create_systems(const char *name, const char *systems, unsigned long range_addr_start, unsigned long range_addr_size) { struct trace_array *tr; int ret; ret = -ENOMEM; tr = kzalloc(sizeof(*tr), GFP_KERNEL); if (!tr) return ERR_PTR(ret); tr->name = kstrdup(name, GFP_KERNEL); if (!tr->name) goto out_free_tr; if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) goto out_free_tr; if (systems) { tr->system_names = kstrdup_const(systems, GFP_KERNEL); if (!tr->system_names) goto out_free_tr; } /* Only for boot up memory mapped ring buffers */ tr->range_addr_start = range_addr_start; tr->range_addr_size = range_addr_size; tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; cpumask_copy(tr->tracing_cpumask, cpu_all_mask); raw_spin_lock_init(&tr->start_lock); tr->syscall_buf_sz = global_trace.syscall_buf_sz; tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; #ifdef CONFIG_TRACER_MAX_TRACE spin_lock_init(&tr->snapshot_trigger_lock); #endif tr->current_trace = &nop_trace; tr->current_trace_flags = nop_trace.flags; INIT_LIST_HEAD(&tr->systems); INIT_LIST_HEAD(&tr->events); INIT_LIST_HEAD(&tr->hist_vars); INIT_LIST_HEAD(&tr->err_log); INIT_LIST_HEAD(&tr->tracers); INIT_LIST_HEAD(&tr->marker_list); #ifdef CONFIG_MODULES INIT_LIST_HEAD(&tr->mod_events); #endif if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; /* The ring buffer is defaultly expanded */ trace_set_ring_buffer_expanded(tr); if (ftrace_allocate_ftrace_ops(tr) < 0) goto out_free_tr; ftrace_init_trace_array(tr); init_trace_flags_index(tr); if (trace_instance_dir) { ret = trace_array_create_dir(tr); if (ret) goto out_free_tr; } else __trace_early_add_events(tr); list_add(&tr->list, &ftrace_trace_arrays); tr->ref++; return tr; out_free_tr: ftrace_free_ftrace_ops(tr); free_trace_buffers(tr); free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); kfree_const(tr->system_names); kfree(tr->range_name); kfree(tr->name); kfree(tr); return ERR_PTR(ret); } static struct trace_array *trace_array_create(const char *name) { return trace_array_create_systems(name, NULL, 0, 0); } static int instance_mkdir(const char *name) { struct trace_array *tr; int ret; guard(mutex)(&event_mutex); guard(mutex)(&trace_types_lock); ret = -EEXIST; if (trace_array_find(name)) return -EEXIST; tr = trace_array_create(name); ret = PTR_ERR_OR_ZERO(tr); return ret; } #ifdef CONFIG_MMU static u64 map_pages(unsigned long start, unsigned long size) { unsigned long vmap_start, vmap_end; struct vm_struct *area; int ret; area = get_vm_area(size, VM_IOREMAP); if (!area) return 0; vmap_start = (unsigned long) area->addr; vmap_end = vmap_start + size; ret = vmap_page_range(vmap_start, vmap_end, start, pgprot_nx(PAGE_KERNEL)); if (ret < 0) { free_vm_area(area); return 0; } return (u64)vmap_start; } #else static inline u64 map_pages(unsigned long start, unsigned long size) { return 0; } #endif /** * trace_array_get_by_name - Create/Lookup a trace array, given its name. * @name: The name of the trace array to be looked up/created. * @systems: A list of systems to create event directories for (NULL for all) * * Returns pointer to trace array with given name. * NULL, if it cannot be created. * * NOTE: This function increments the reference counter associated with the * trace array returned. This makes sure it cannot be freed while in use. * Use trace_array_put() once the trace array is no longer needed. * If the trace_array is to be freed, trace_array_destroy() needs to * be called after the trace_array_put(), or simply let user space delete * it from the tracefs instances directory. But until the * trace_array_put() is called, user space can not delete it. * */ struct trace_array *trace_array_get_by_name(const char *name, const char *systems) { struct trace_array *tr; guard(mutex)(&event_mutex); guard(mutex)(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (tr->name && strcmp(tr->name, name) == 0) { tr->ref++; return tr; } } tr = trace_array_create_systems(name, systems, 0, 0); if (IS_ERR(tr)) tr = NULL; else tr->ref++; return tr; } EXPORT_SYMBOL_GPL(trace_array_get_by_name); static int __remove_instance(struct trace_array *tr) { int i; /* Reference counter for a newly created trace array = 1. */ if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) return -EBUSY; list_del(&tr->list); /* Disable all the flags that were enabled coming in */ for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { if ((1ULL << i) & ZEROED_TRACE_FLAGS) set_tracer_flag(tr, 1ULL << i, 0); } if (printk_trace == tr) update_printk_trace(&global_trace); if (update_marker_trace(tr, 0)) synchronize_rcu(); tracing_set_nop(tr); clear_ftrace_function_probes(tr); event_trace_del_tracer(tr); ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); tracefs_remove(tr->dir); free_percpu(tr->last_func_repeats); free_trace_buffers(tr); clear_tracing_err_log(tr); free_tracers(tr); if (tr->range_name) { reserve_mem_release_by_name(tr->range_name); kfree(tr->range_name); } if (tr->flags & TRACE_ARRAY_FL_VMALLOC) vfree((void *)tr->range_addr_start); for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); } kfree(tr->topts); free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); kfree_const(tr->system_names); kfree(tr->name); kfree(tr); return 0; } int trace_array_destroy(struct trace_array *this_tr) { struct trace_array *tr; if (!this_tr) return -EINVAL; guard(mutex)(&event_mutex); guard(mutex)(&trace_types_lock); /* Making sure trace array exists before destroying it. */ list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (tr == this_tr) return __remove_instance(tr); } return -ENODEV; } EXPORT_SYMBOL_GPL(trace_array_destroy); static int instance_rmdir(const char *name) { struct trace_array *tr; guard(mutex)(&event_mutex); guard(mutex)(&trace_types_lock); tr = trace_array_find(name); if (!tr) return -ENODEV; return __remove_instance(tr); } static __init void create_trace_instances(struct dentry *d_tracer) { struct trace_array *tr; trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, instance_mkdir, instance_rmdir); if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) return; guard(mutex)(&event_mutex); guard(mutex)(&trace_types_lock); list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->name) continue; if (MEM_FAIL(trace_array_create_dir(tr) < 0, "Failed to create instance directory\n")) return; } } static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) { int cpu; trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, tr, &show_traces_fops); trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, tr, &set_tracer_fops); trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, tr, &tracing_cpumask_fops); trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, tr, &tracing_iter_fops); trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, tr, &tracing_fops); trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, tr, &tracing_pipe_fops); trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, tr, &tracing_entries_fops); trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, tr, &tracing_total_entries_fops); trace_create_file("free_buffer", 0200, d_tracer, tr, &tracing_free_buffer_fops); trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); trace_create_file("trace_marker_raw", 0220, d_tracer, tr, &tracing_mark_raw_fops); trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, &trace_clock_fops); trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, tr, &rb_simple_fops); trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, &trace_time_stamp_mode_fops); tr->buffer_percent = 50; trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, tr, &buffer_percent_fops); trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, tr, &buffer_subbuf_size_fops); trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, tr, &tracing_syscall_buf_fops); create_trace_options_dir(tr); #ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); #endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); if (tr->range_addr_start) { trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, tr, &last_boot_fops); #ifdef CONFIG_TRACER_SNAPSHOT } else { trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, tr, &snapshot_fops); #endif } trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, tr, &tracing_err_log_fops); for_each_tracing_cpu(cpu) tracing_init_tracefs_percpu(tr, cpu); ftrace_init_tracefs(tr, d_tracer); } #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) { struct vfsmount *mnt; struct file_system_type *type; struct fs_context *fc; int ret; /* * To maintain backward compatibility for tools that mount * debugfs to get to the tracing facility, tracefs is automatically * mounted to the debugfs/tracing directory. */ type = get_fs_type("tracefs"); if (!type) return NULL; fc = fs_context_for_submount(type, mntpt); put_filesystem(type); if (IS_ERR(fc)) return ERR_CAST(fc); pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); ret = vfs_parse_fs_string(fc, "source", "tracefs"); if (!ret) mnt = fc_mount(fc); else mnt = ERR_PTR(ret); put_fs_context(fc); return mnt; } #endif /** * tracing_init_dentry - initialize top level trace array * * This is called when creating files or directories in the tracing * directory. It is called via fs_initcall() by any of the boot up code * and expects to return the dentry of the top level tracing directory. */ int tracing_init_dentry(void) { struct trace_array *tr = &global_trace; if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Tracing disabled due to lockdown\n"); return -EPERM; } /* The top level trace array uses NULL as parent */ if (tr->dir) return 0; if (WARN_ON(!tracefs_initialized())) return -ENODEV; #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED /* * As there may still be users that expect the tracing * files to exist in debugfs/tracing, we must automount * the tracefs file system there, so older tools still * work with the newer kernel. */ tr->dir = debugfs_create_automount("tracing", NULL, trace_automount, NULL); #endif return 0; } extern struct trace_eval_map *__start_ftrace_eval_maps[]; extern struct trace_eval_map *__stop_ftrace_eval_maps[]; static struct workqueue_struct *eval_map_wq __initdata; static struct work_struct eval_map_work __initdata; static struct work_struct tracerfs_init_work __initdata; static void __init eval_map_work_func(struct work_struct *work) { int len; len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); } static int __init trace_eval_init(void) { INIT_WORK(&eval_map_work, eval_map_work_func); eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); if (!eval_map_wq) { pr_err("Unable to allocate eval_map_wq\n"); /* Do work here */ eval_map_work_func(&eval_map_work); return -ENOMEM; } queue_work(eval_map_wq, &eval_map_work); return 0; } subsys_initcall(trace_eval_init); static int __init trace_eval_sync(void) { /* Make sure the eval map updates are finished */ if (eval_map_wq) destroy_workqueue(eval_map_wq); return 0; } late_initcall_sync(trace_eval_sync); #ifdef CONFIG_MODULES bool module_exists(const char *module) { /* All modules have the symbol __this_module */ static const char this_mod[] = "__this_module"; char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); if (n > sizeof(modname) - 1) return false; val = module_kallsyms_lookup_name(modname); return val != 0; } static void trace_module_add_evals(struct module *mod) { /* * Modules with bad taint do not have events created, do * not bother with enums either. */ if (trace_module_has_bad_taint(mod)) return; /* Even if no trace_evals, this need to sanitize field types. */ trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); } #ifdef CONFIG_TRACE_EVAL_MAP_FILE static void trace_module_remove_evals(struct module *mod) { union trace_eval_map_item *map; union trace_eval_map_item **last = &trace_eval_maps; if (!mod->num_trace_evals) return; guard(mutex)(&trace_eval_mutex); map = trace_eval_maps; while (map) { if (map->head.mod == mod) break; map = trace_eval_jmp_to_tail(map); last = &map->tail.next; map = map->tail.next; } if (!map) return; *last = trace_eval_jmp_to_tail(map)->tail.next; kfree(map); } #else static inline void trace_module_remove_evals(struct module *mod) { } #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ static void trace_module_record(struct module *mod, bool add) { struct trace_array *tr; unsigned long flags; list_for_each_entry(tr, &ftrace_trace_arrays, list) { flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); /* Update any persistent trace array that has already been started */ if (flags == TRACE_ARRAY_FL_BOOT && add) { guard(mutex)(&scratch_mutex); save_mod(mod, tr); } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { /* Update delta if the module loaded in previous boot */ make_mod_delta(mod, tr); } } } static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = data; switch (val) { case MODULE_STATE_COMING: trace_module_add_evals(mod); trace_module_record(mod, true); break; case MODULE_STATE_GOING: trace_module_remove_evals(mod); trace_module_record(mod, false); break; } return NOTIFY_OK; } static struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, .priority = 0, }; #endif /* CONFIG_MODULES */ static __init void tracer_init_tracefs_work_func(struct work_struct *work) { event_trace_init(); init_tracer_tracefs(&global_trace, NULL); ftrace_init_tracefs_toplevel(&global_trace, NULL); trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, &global_trace, &tracing_thresh_fops); trace_create_file("README", TRACE_MODE_READ, NULL, NULL, &tracing_readme_fops); trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, NULL, &tracing_saved_cmdlines_fops); trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, NULL, &tracing_saved_cmdlines_size_fops); trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, NULL, &tracing_saved_tgids_fops); trace_create_eval_file(NULL); #ifdef CONFIG_MODULES register_module_notifier(&trace_module_nb); #endif #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, NULL, &tracing_dyn_info_fops); #endif create_trace_instances(NULL); update_tracer_options(); } static __init int tracer_init_tracefs(void) { int ret; trace_access_lock_init(); ret = tracing_init_dentry(); if (ret) return 0; if (eval_map_wq) { INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); queue_work(eval_map_wq, &tracerfs_init_work); } else { tracer_init_tracefs_work_func(NULL); } if (rv_init_interface()) pr_err("RV: Error while creating the RV interface\n"); return 0; } fs_initcall(tracer_init_tracefs); static int trace_die_panic_handler(struct notifier_block *self, unsigned long ev, void *unused); static struct notifier_block trace_panic_notifier = { .notifier_call = trace_die_panic_handler, .priority = INT_MAX - 1, }; static struct notifier_block trace_die_notifier = { .notifier_call = trace_die_panic_handler, .priority = INT_MAX - 1, }; /* * The idea is to execute the following die/panic callback early, in order * to avoid showing irrelevant information in the trace (like other panic * notifier functions); we are the 2nd to run, after hung_task/rcu_stall * warnings get disabled (to prevent potential log flooding). */ static int trace_die_panic_handler(struct notifier_block *self, unsigned long ev, void *unused) { if (!ftrace_dump_on_oops_enabled()) return NOTIFY_DONE; /* The die notifier requires DIE_OOPS to trigger */ if (self == &trace_die_notifier && ev != DIE_OOPS) return NOTIFY_DONE; ftrace_dump(DUMP_PARAM); return NOTIFY_DONE; } /* * printk is set to max of 1024, we really don't need it that big. * Nothing should be printing 1000 characters anyway. */ #define TRACE_MAX_PRINT 1000 /* * Define here KERN_TRACE so that we have one place to modify * it if we decide to change what log level the ftrace dump * should be at. */ #define KERN_TRACE KERN_EMERG void trace_printk_seq(struct trace_seq *s) { /* Probably should print a warning here. */ if (s->seq.len >= TRACE_MAX_PRINT) s->seq.len = TRACE_MAX_PRINT; /* * More paranoid code. Although the buffer size is set to * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just * an extra layer of protection. */ if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) s->seq.len = s->seq.size - 1; /* should be zero ended, but we are paranoid. */ s->buffer[s->seq.len] = 0; printk(KERN_TRACE "%s", s->buffer); trace_seq_init(s); } static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) { iter->tr = tr; iter->trace = iter->tr->current_trace; iter->cpu_file = RING_BUFFER_ALL_CPUS; iter->array_buffer = &tr->array_buffer; if (iter->trace && iter->trace->open) iter->trace->open(iter); /* Annotate start of buffers if we had overruns */ if (ring_buffer_overruns(iter->array_buffer->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; /* Can not use kmalloc for iter.temp and iter.fmt */ iter->temp = static_temp_buf; iter->temp_size = STATIC_TEMP_BUF_SIZE; iter->fmt = static_fmt_buf; iter->fmt_size = STATIC_FMT_BUF_SIZE; } void trace_init_global_iter(struct trace_iterator *iter) { trace_init_iter(iter, &global_trace); } static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) { /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; unsigned int old_userobj; unsigned long flags; int cnt = 0; /* * Always turn off tracing when we dump. * We don't need to show trace output of what happens * between multiple crashes. * * If the user does a sysrq-z, then they can re-enable * tracing with echo 1 > tracing_on. */ tracer_tracing_off(tr); local_irq_save(flags); /* Simulate the iterator */ trace_init_iter(&iter, tr); /* While dumping, do not allow the buffer to be enable */ tracer_tracing_disable(tr); old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); /* don't look at user memory in panic mode */ tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); if (dump_mode == DUMP_ORIG) iter.cpu_file = raw_smp_processor_id(); else iter.cpu_file = RING_BUFFER_ALL_CPUS; if (tr == &global_trace) printk(KERN_TRACE "Dumping ftrace buffer:\n"); else printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); /* Did function tracer already get disabled? */ if (ftrace_is_dead()) { printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); printk("# MAY BE MISSING FUNCTION EVENTS\n"); } /* * We need to stop all tracing on all CPUS to read * the next buffer. This is a bit expensive, but is * not done often. We fill all what we can read, * and then release the locks again. */ while (!trace_empty(&iter)) { if (!cnt) printk(KERN_TRACE "---------------------------------\n"); cnt++; trace_iterator_reset(&iter); iter.iter_flags |= TRACE_FILE_LAT_FMT; if (trace_find_next_entry_inc(&iter) != NULL) { int ret; ret = print_trace_line(&iter); if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(&iter); trace_printk_seq(&iter.seq); } touch_nmi_watchdog(); } if (!cnt) printk(KERN_TRACE " (ftrace buffer empty)\n"); else printk(KERN_TRACE "---------------------------------\n"); tr->trace_flags |= old_userobj; tracer_tracing_enable(tr); local_irq_restore(flags); } static void ftrace_dump_by_param(void) { bool first_param = true; char dump_param[MAX_TRACER_SIZE]; char *buf, *token, *inst_name; struct trace_array *tr; strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); buf = dump_param; while ((token = strsep(&buf, ",")) != NULL) { if (first_param) { first_param = false; if (!strcmp("0", token)) continue; else if (!strcmp("1", token)) { ftrace_dump_one(&global_trace, DUMP_ALL); continue; } else if (!strcmp("2", token) || !strcmp("orig_cpu", token)) { ftrace_dump_one(&global_trace, DUMP_ORIG); continue; } } inst_name = strsep(&token, "="); tr = trace_array_find(inst_name); if (!tr) { printk(KERN_TRACE "Instance %s not found\n", inst_name); continue; } if (token && (!strcmp("2", token) || !strcmp("orig_cpu", token))) ftrace_dump_one(tr, DUMP_ORIG); else ftrace_dump_one(tr, DUMP_ALL); } } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { static atomic_t dump_running; /* Only allow one dump user at a time. */ if (atomic_inc_return(&dump_running) != 1) { atomic_dec(&dump_running); return; } switch (oops_dump_mode) { case DUMP_ALL: ftrace_dump_one(&global_trace, DUMP_ALL); break; case DUMP_ORIG: ftrace_dump_one(&global_trace, DUMP_ORIG); break; case DUMP_PARAM: ftrace_dump_by_param(); break; case DUMP_NONE: break; default: printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); ftrace_dump_one(&global_trace, DUMP_ALL); } atomic_dec(&dump_running); } EXPORT_SYMBOL_GPL(ftrace_dump); #define WRITE_BUFSIZE 4096 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(const char *)) { char *kbuf __free(kfree) = NULL; char *buf, *tmp; int ret = 0; size_t done = 0; size_t size; kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); if (!kbuf) return -ENOMEM; while (done < count) { size = count - done; if (size >= WRITE_BUFSIZE) size = WRITE_BUFSIZE - 1; if (copy_from_user(kbuf, buffer + done, size)) return -EFAULT; kbuf[size] = '\0'; buf = kbuf; do { tmp = strchr(buf, '\n'); if (tmp) { *tmp = '\0'; size = tmp - buf + 1; } else { size = strlen(buf); if (done + size < count) { if (buf != kbuf) break; /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ pr_warn("Line length is too long: Should be less than %d\n", WRITE_BUFSIZE - 2); return -EINVAL; } } done += size; /* Remove comments */ tmp = strchr(buf, '#'); if (tmp) *tmp = '\0'; ret = createfn(buf); if (ret) return ret; buf += size; } while (done < count); } return done; } #ifdef CONFIG_TRACER_MAX_TRACE __init static bool tr_needs_alloc_snapshot(const char *name) { char *test; int len = strlen(name); bool ret; if (!boot_snapshot_index) return false; if (strncmp(name, boot_snapshot_info, len) == 0 && boot_snapshot_info[len] == '\t') return true; test = kmalloc(strlen(name) + 3, GFP_KERNEL); if (!test) return false; sprintf(test, "\t%s\t", name); ret = strstr(boot_snapshot_info, test) == NULL; kfree(test); return ret; } __init static void do_allocate_snapshot(const char *name) { if (!tr_needs_alloc_snapshot(name)) return; /* * When allocate_snapshot is set, the next call to * allocate_trace_buffers() (called by trace_array_get_by_name()) * will allocate the snapshot buffer. That will also clear * this flag. */ allocate_snapshot = true; } #else static inline void do_allocate_snapshot(const char *name) { } #endif __init static int backup_instance_area(const char *backup, unsigned long *addr, phys_addr_t *size) { struct trace_array *backup_tr; void *allocated_vaddr = NULL; backup_tr = trace_array_get_by_name(backup, NULL); if (!backup_tr) { pr_warn("Tracing: Instance %s is not found.\n", backup); return -ENOENT; } if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); trace_array_put(backup_tr); return -EINVAL; } *size = backup_tr->range_addr_size; allocated_vaddr = vzalloc(*size); if (!allocated_vaddr) { pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", backup, (unsigned long)*size); trace_array_put(backup_tr); return -ENOMEM; } memcpy(allocated_vaddr, (void *)backup_tr->range_addr_start, (size_t)*size); *addr = (unsigned long)allocated_vaddr; trace_array_put(backup_tr); return 0; } __init static void enable_instances(void) { struct trace_array *tr; bool memmap_area = false; char *curr_str; char *name; char *str; char *tok; /* A tab is always appended */ boot_instance_info[boot_instance_index - 1] = '\0'; str = boot_instance_info; while ((curr_str = strsep(&str, "\t"))) { phys_addr_t start = 0; phys_addr_t size = 0; unsigned long addr = 0; bool traceprintk = false; bool traceoff = false; char *flag_delim; char *addr_delim; char *rname __free(kfree) = NULL; char *backup; tok = strsep(&curr_str, ","); name = strsep(&tok, "="); backup = tok; flag_delim = strchr(name, '^'); addr_delim = strchr(name, '@'); if (addr_delim) *addr_delim++ = '\0'; if (flag_delim) *flag_delim++ = '\0'; if (backup) { if (backup_instance_area(backup, &addr, &size) < 0) continue; } if (flag_delim) { char *flag; while ((flag = strsep(&flag_delim, "^"))) { if (strcmp(flag, "traceoff") == 0) { traceoff = true; } else if ((strcmp(flag, "printk") == 0) || (strcmp(flag, "traceprintk") == 0) || (strcmp(flag, "trace_printk") == 0)) { traceprintk = true; } else { pr_info("Tracing: Invalid instance flag '%s' for %s\n", flag, name); } } } tok = addr_delim; if (tok && isdigit(*tok)) { start = memparse(tok, &tok); if (!start) { pr_warn("Tracing: Invalid boot instance address for %s\n", name); continue; } if (*tok != ':') { pr_warn("Tracing: No size specified for instance %s\n", name); continue; } tok++; size = memparse(tok, &tok); if (!size) { pr_warn("Tracing: Invalid boot instance size for %s\n", name); continue; } memmap_area = true; } else if (tok) { if (!reserve_mem_find_by_name(tok, &start, &size)) { start = 0; pr_warn("Failed to map boot instance %s to %s\n", name, tok); continue; } rname = kstrdup(tok, GFP_KERNEL); } if (start) { /* Start and size must be page aligned */ if (start & ~PAGE_MASK) { pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); continue; } if (size & ~PAGE_MASK) { pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); continue; } if (memmap_area) addr = map_pages(start, size); else addr = (unsigned long)phys_to_virt(start); if (addr) { pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", name, &start, (unsigned long)size); } else { pr_warn("Tracing: Failed to map boot instance %s\n", name); continue; } } else { /* Only non mapped buffers have snapshot buffers */ if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) do_allocate_snapshot(name); } tr = trace_array_create_systems(name, NULL, addr, size); if (IS_ERR(tr)) { pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); continue; } if (traceoff) tracer_tracing_off(tr); if (traceprintk) update_printk_trace(tr); /* * memmap'd buffers can not be freed. */ if (memmap_area) { tr->flags |= TRACE_ARRAY_FL_MEMMAP; tr->ref++; } /* * Backup buffers can be freed but need vfree(). */ if (backup) tr->flags |= TRACE_ARRAY_FL_VMALLOC; if (start || backup) { tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; tr->range_name = no_free_ptr(rname); } while ((tok = strsep(&curr_str, ","))) { early_enable_events(tr, tok, true); } } } __init static int tracer_alloc_buffers(void) { int ring_buf_size; int ret = -ENOMEM; if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Tracing disabled due to lockdown\n"); return -EPERM; } /* * Make sure we don't accidentally add more trace options * than we have bits for. */ BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) return -ENOMEM; if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; /* Only allocate trace_printk buffers if a trace_printk exists */ if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) /* Must be called before global_trace.buffer is allocated */ trace_printk_init_buffers(); /* To save memory, keep the ring buffer size to its minimum */ if (global_trace.ring_buffer_expanded) ring_buf_size = trace_buf_size; else ring_buf_size = 1; cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); raw_spin_lock_init(&global_trace.start_lock); /* * The prepare callbacks allocates some memory for the ring buffer. We * don't free the buffer if the CPU goes down. If we were to free * the buffer, then the user would lose any trace that was in the * buffer. The memory will be removed once the "instance" is removed. */ ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, "trace/RB:prepare", trace_rb_cpu_prepare, NULL); if (ret < 0) goto out_free_cpumask; /* Used for event triggers */ ret = -ENOMEM; temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); if (!temp_buffer) goto out_rm_hp_state; if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) goto out_free_savedcmd; /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); goto out_free_pipe_cpumask; } if (global_trace.buffer_disabled) tracing_off(); if (trace_boot_clock) { ret = tracing_set_clock(&global_trace, trace_boot_clock); if (ret < 0) pr_warn("Trace clock %s not defined, going back to default\n", trace_boot_clock); } /* * register_tracer() might reference current_trace, so it * needs to be set before we register anything. This is * just a bootstrap of current_trace anyway. */ global_trace.current_trace = &nop_trace; global_trace.current_trace_flags = nop_trace.flags; global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; #ifdef CONFIG_TRACER_MAX_TRACE spin_lock_init(&global_trace.snapshot_trigger_lock); #endif ftrace_init_global_array_ops(&global_trace); #ifdef CONFIG_MODULES INIT_LIST_HEAD(&global_trace.mod_events); #endif init_trace_flags_index(&global_trace); INIT_LIST_HEAD(&global_trace.tracers); /* All seems OK, enable tracing */ tracing_disabled = 0; atomic_notifier_chain_register(&panic_notifier_list, &trace_panic_notifier); register_die_notifier(&trace_die_notifier); global_trace.flags = TRACE_ARRAY_FL_GLOBAL; global_trace.syscall_buf_sz = syscall_buf_size; INIT_LIST_HEAD(&global_trace.systems); INIT_LIST_HEAD(&global_trace.events); INIT_LIST_HEAD(&global_trace.hist_vars); INIT_LIST_HEAD(&global_trace.err_log); list_add(&global_trace.marker_list, &marker_copies); list_add(&global_trace.list, &ftrace_trace_arrays); register_tracer(&nop_trace); /* Function tracing may start here (via kernel command line) */ init_function_trace(); apply_trace_boot_options(); register_snapshot_cmd(); return 0; out_free_pipe_cpumask: free_cpumask_var(global_trace.pipe_cpumask); out_free_savedcmd: trace_free_saved_cmdlines_buffer(); out_free_temp_buffer: ring_buffer_free(temp_buffer); out_rm_hp_state: cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); out_free_cpumask: free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); return ret; } #ifdef CONFIG_FUNCTION_TRACER /* Used to set module cached ftrace filtering at boot up */ struct trace_array *trace_get_global_array(void) { return &global_trace; } #endif void __init ftrace_boot_snapshot(void) { #ifdef CONFIG_TRACER_MAX_TRACE struct trace_array *tr; if (!snapshot_at_boot) return; list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->allocated_snapshot) continue; tracing_snapshot_instance(tr); trace_array_puts(tr, "** Boot snapshot taken **\n"); } #endif } void __init early_trace_init(void) { if (tracepoint_printk) { tracepoint_print_iter = kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); if (MEM_FAIL(!tracepoint_print_iter, "Failed to allocate trace iterator\n")) tracepoint_printk = 0; else static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); init_events(); } void __init trace_init(void) { trace_event_init(); if (boot_instance_index) enable_instances(); } __init static void clear_boot_tracer(void) { /* * The default tracer at boot buffer is an init section. * This function is called in lateinit. If we did not * find the boot tracer, then clear it out, to prevent * later registration from accessing the buffer that is * about to be freed. */ if (!default_bootup_tracer) return; printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", default_bootup_tracer); default_bootup_tracer = NULL; } #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK __init static void tracing_set_default_clock(void) { /* sched_clock_stable() is determined in late_initcall */ if (!trace_boot_clock && !sched_clock_stable()) { if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Can not set tracing clock due to lockdown\n"); return; } printk(KERN_WARNING "Unstable clock detected, switching default tracing clock to \"global\"\n" "If you want to keep using the local clock, then add:\n" " \"trace_clock=local\"\n" "on the kernel command line\n"); tracing_set_clock(&global_trace, "global"); } } #else static inline void tracing_set_default_clock(void) { } #endif __init static int late_trace_init(void) { if (tracepoint_printk && tracepoint_printk_stop_on_boot) { static_key_disable(&tracepoint_printk_key.key); tracepoint_printk = 0; } if (traceoff_after_boot) tracing_off(); tracing_set_default_clock(); clear_boot_tracer(); return 0; } late_initcall_sync(late_trace_init);
2 18 2 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ #define _NET_BATMAN_ADV_ORIGINATOR_H_ #include "main.h" #include <linux/compiler.h> #include <linux/if_ether.h> #include <linux/jhash.h> #include <linux/kref.h> #include <linux/netlink.h> #include <linux/skbuff.h> #include <linux/types.h> bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); void batadv_orig_node_release(struct kref *ref); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr); struct batadv_hardif_neigh_node * batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, const u8 *neigh_addr); void batadv_hardif_neigh_release(struct kref *ref); struct batadv_neigh_node * batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, const u8 *neigh_addr); void batadv_neigh_node_release(struct kref *ref); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, const struct batadv_hard_iface *if_outgoing); struct batadv_neigh_node * batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, struct batadv_hard_iface *if_outgoing); struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_release(struct kref *ref); int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); struct batadv_orig_ifinfo * batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); struct batadv_orig_ifinfo * batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); void batadv_orig_ifinfo_release(struct kref *ref); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); struct batadv_orig_node_vlan * batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); void batadv_orig_node_vlan_release(struct kref *ref); /** * batadv_choose_orig() - Return the index of the orig entry in the hash table * @data: mac address of the originator node * @size: the size of the hash table * * Return: the hash index where the object represented by @data should be * stored at. */ static inline u32 batadv_choose_orig(const void *data, u32 size) { u32 hash = 0; hash = jhash(data, ETH_ALEN, hash); return hash % size; } struct batadv_orig_node * batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data); /** * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release * the originator-vlan object * @orig_vlan: the originator-vlan object to release */ static inline void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan) { if (!orig_vlan) return; kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); } /** * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release * the neigh_ifinfo * @neigh_ifinfo: the neigh_ifinfo object to release */ static inline void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo) { if (!neigh_ifinfo) return; kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release); } /** * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter * and possibly release it * @hardif_neigh: hardif neigh neighbor to free */ static inline void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh) { if (!hardif_neigh) return; kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release); } /** * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly * release it * @neigh_node: neigh neighbor to free */ static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) { if (!neigh_node) return; kref_put(&neigh_node->refcount, batadv_neigh_node_release); } /** * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release * the orig_ifinfo * @orig_ifinfo: the orig_ifinfo object to release */ static inline void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo) { if (!orig_ifinfo) return; kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release); } /** * batadv_orig_node_put() - decrement the orig node refcounter and possibly * release it * @orig_node: the orig node to free */ static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node) { if (!orig_node) return; kref_put(&orig_node->refcount, batadv_orig_node_release); } #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
78 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_SHARED_IO_H #define _ASM_X86_SHARED_IO_H #include <linux/types.h> #define BUILDIO(bwl, bw, type) \ static __always_inline void __out##bwl(type value, u16 port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(u16 port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, u8) BUILDIO(w, w, u16) BUILDIO(l, , u32) #undef BUILDIO #define inb __inb #define inw __inw #define inl __inl #define outb __outb #define outw __outw #define outl __outl #endif
3 3 1793 2 194 1798 1801 1798 1796 1795 1793 1795 1800 1795 1794 1797 254 254 243 233 45 249 246 243 6 7 242 86 110 174 221 174 3 28 103 220 89 151 3 2 1 8 6 13 289 284 281 277 283 7 255 134 134 96 88 90 312 1 293 308 2 287 287 5 6 16 256 153 252 220 220 1 1 4 3 3 1 4 1 1 1 2 1 16 4 1 11 4 320 2 2 1 1 1 279 16 3 2 1 1 1 332 1 325 7 24 1 12 6 6 2 1 9 3 3 3 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <linux/poison.h> #include <net/ipv6.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); void *ip6t_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ip6t, IP6T); } EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, u16 *fragoff, bool *hotdrop) { unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); if (NF_INVF(ip6info, IP6T_INV_SRCIP, ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src)) || NF_INVF(ip6info, IP6T_INV_DSTIP, ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ /* look for the desired protocol header */ if (ip6info->flags & IP6T_F_PROTO) { int protohdr; unsigned short _frag_off; protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL); if (protohdr < 0) { if (_frag_off == 0) *hotdrop = true; return false; } *fragoff = _frag_off; if (ip6info->proto == protohdr) { if (ip6info->invflags & IP6T_INV_PROTO) return false; return true; } /* We need match for the '-p all', too! */ if ((ip6info->proto != 0) && !(ip6info->invflags & IP6T_INV_PROTO)) return false; } return true; } /* should be ip6 safe */ static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) return false; if (ipv6->invflags & ~IP6T_INV_MASK) return false; return true; } static unsigned int ip6t_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } static inline struct ip6t_entry * get_entry(const void *base, unsigned int offset) { return (struct ip6t_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; return e->target_offset == sizeof(struct ip6t_entry) && memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * ip6t_get_target_c(const struct ip6t_entry *e) { return ip6t_get_target((struct ip6t_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* This cries for unification! */ static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { NF_IP6_TRACE_COMMENT_RULE, NF_IP6_TRACE_COMMENT_RETURN, NF_IP6_TRACE_COMMENT_POLICY, }; static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_RULE] = "rule", [NF_IP6_TRACE_COMMENT_RETURN] = "return", [NF_IP6_TRACE_COMMENT_POLICY] = "policy", }; static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = LOGLEVEL_WARNING, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { const struct xt_standard_target *t = (void *)ip6t_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ *chainname = t->target.data; (*rulenum) = 0; } else if (s == e) { (*rulenum)++; if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] : comments[NF_IP6_TRACE_COMMENT_RETURN]; } return 1; } else (*rulenum)++; return 0; } static void trace_packet(struct net *net, const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, const struct xt_table_info *private, const struct ip6t_entry *e) { const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) if (get_chainname_rulenum(iter, e, hookname, &chainname, &comment, &rulenum) != 0) break; nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } #endif static inline struct ip6t_entry * ip6t_next_entry(const struct ip6t_entry *entry) { return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; const void *table_base; struct ip6t_entry *e, **jumpstack; unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ stackidx = 0; indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state; WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; /* Switch to alternate jumpstack if we're being invoked via TEE. * TEE issues XT_CONTINUE verdict on original skb so we must not * clobber the jumpstack. * * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ if (static_key_false(&xt_tee_enabled)) jumpstack += private->stacksize * current->in_nf_duplicate; e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; struct xt_counters *counter; WARN_ON(!e); acpar.thoff = 0; if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { no_match: e = ip6t_next_entry(e); continue; } xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(state->net, skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) e = get_entry(table_base, private->underflow[hook]); else e = ip6t_next_entry(jumpstack[--stackidx]); continue; } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) e = ip6t_next_entry(e); else /* Verdict */ break; } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)ip6t_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static void cleanup_match(struct xt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV6; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; par->match = m->u.kernel.match; par->matchinfo = m->data; return xt_check_match(par, m->u.match_size - sizeof(*m), ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); } static int find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; ret = check_match(m, par); if (ret) goto err; return 0; err: module_put(m->u.kernel.match->me); return ret; } static int check_target(struct ip6t_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ip6t_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV6, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); } static int find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV6; xt_ematch_foreach(ematch, e) { ret = find_check_match(ematch, &mtpar); if (ret != 0) goto cleanup_matches; ++j; } t = ip6t_get_target(e); target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; cleanup_match(ematch, net); } xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct ip6t_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!ip6_checkentry(&e->ipv6)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct ip6t_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) cleanup_match(ematch, net); t = ip6t_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV6; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ip6t_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ip6t_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ip6t_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ip6t_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { const struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; const struct xt_entry_match *m; const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } for (i = sizeof(struct ip6t_entry); i < e->target_offset; i += m->u.match_size) { m = (void *)e + i; if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ip6t_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(AF_INET6, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(AF_INET6, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct ip6t_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_match *ematch; const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - base; xt_ematch_foreach(ematch, e) off += xt_compat_match_offset(ematch->u.kernel.match); t = ip6t_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_INET_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct ip6t_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct ip6t_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(AF_INET6, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct ip6t_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(AF_INET6); #endif t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); memcpy(info.underflow, private->underflow, sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; strcpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; xt_table_unlock(t); module_put(t->me); } else ret = PTR_ERR(t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_unlock(AF_INET6); #endif return ret; } static int get_entries(struct net *net, struct ip6t_get_entries __user *uptr, const int *len) { int ret; struct ip6t_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct ip6t_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else ret = -EAGAIN; module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); return ret; } static int __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { int ret; struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; struct ip6t_entry *iter; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; } t = xt_request_find_table_lock(net, AF_INET6, name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } /* You lied! */ if (valid_hooks != t->valid_hooks) { ret = -EINVAL; goto put_module; } oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); xt_table_unlock(t); get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); } vfree(counters); return 0; put_module: module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: vfree(counters); out: return ret; } static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ip6t_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free; } local_bh_disable(); private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: vfree(paddc); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct compat_ip6t_replace { char name[XT_TABLE_MAXNAMELEN]; u32 valid_hooks; u32 num_entries; u32 size; u32 hook_entry[NF_INET_NUMHOOKS]; u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ struct compat_ip6t_entry entries[]; }; static int compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, unsigned int i) { struct xt_entry_target *t; struct compat_ip6t_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; const struct xt_entry_match *ematch; int ret = 0; origsize = *size; ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) return -EFAULT; *dstptr += sizeof(struct compat_ip6t_entry); *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); xt_ematch_foreach(ematch, e) { ret = xt_compat_match_to_user(ematch, dstptr, size); if (ret != 0) return ret; } target_offset = e->target_offset - (origsize - *size); t = ip6t_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(target_offset, &ce->target_offset) != 0 || put_user(next_offset, &ce->next_offset) != 0) return -EFAULT; return 0; } static int compat_find_calc_match(struct xt_entry_match *m, const struct ip6t_ip6 *ipv6, int *size) { struct xt_match *match; match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; } static void compat_release_entry(struct compat_ip6t_entry *e) { struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) module_put(ematch->u.kernel.match->me); t = compat_ip6t_get_target(e); module_put(t->u.kernel.target->me); } static int check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; int ret, off; if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct compat_ip6t_entry) + sizeof(struct compat_xt_entry_target)) return -EINVAL; if (!ip6_checkentry(&e->ipv6)) return -EINVAL; ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; } t = compat_ip6t_get_target(e); target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; off += xt_compat_target_offset(target); *size += off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); if (ret) goto out; return 0; out: module_put(t->u.kernel.target->me); release_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; module_put(ematch->u.kernel.match->me); } return ret; } static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; int h; struct xt_entry_match *ematch; origsize = *size; de = *dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); xt_ematch_foreach(ematch, e) xt_compat_match_from_user(ematch, dstptr, size); de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } } static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, void **pentry0, const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; struct ip6t_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; size = compatr->size; info->number = compatr->num_entries; j = 0; xt_compat_lock(AF_INET6); ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries); if (ret) goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; if (j != compatr->num_entries) goto out_unlock; ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; memset(newinfo->entries, 0, size); newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); memcpy(&repl, compatr, sizeof(*compatr)); for (i = 0; i < NF_INET_NUMHOOKS; i++) { repl.hook_entry[i] = newinfo->hook_entry[i]; repl.underflow[i] = newinfo->underflow[i]; } repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); return 0; free_newinfo: xt_free_table_info(newinfo); return ret; out_unlock: xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ip6t_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } struct compat_ip6t_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; struct compat_ip6t_entry entrytable[]; }; static int compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; unsigned int i = 0; struct ip6t_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); pos = userptr; size = total_size; xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) break; } vfree(counters); return ret; } static int compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, int *len) { int ret; struct compat_ip6t_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); else if (!ret) ret = -EAGAIN; xt_compat_flush_offsets(AF_INET6); module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); xt_compat_unlock(AF_INET6); return ret; } #endif static int do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IP6T_SO_SET_REPLACE: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(sock_net(sk), arg, len); else #endif ret = do_replace(sock_net(sk), arg, len); break; case IP6T_SO_SET_ADD_COUNTERS: ret = do_add_counters(sock_net(sk), arg, len); break; default: ret = -EINVAL; } return ret; } static int do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IP6T_SO_GET_INFO: ret = get_info(sock_net(sk), user, len); break; case IP6T_SO_GET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_get_entries(sock_net(sk), user, len); else #endif ret = get_entries(sock_net(sk), user, len); break; case IP6T_SO_GET_REVISION_MATCH: case IP6T_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; int target; if (*len != sizeof(rev)) { ret = -EINVAL; break; } if (copy_from_user(&rev, user, sizeof(rev)) != 0) { ret = -EFAULT; break; } rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; else target = 0; try_then_request_module(xt_find_revision(AF_INET6, rev.name, rev.revision, target, &ret), "ip6t_%s", rev.name); break; } default: ret = -EINVAL; } return ret; } static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; struct ip6t_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); } int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *template_ops) { struct nf_hook_ops *ops; unsigned int num_ops; int ret, i; struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) { xt_free_table_info(newinfo); return ret; } new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ip6t_entry *iter; xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); xt_free_table_info(newinfo); return PTR_ERR(new_table); } if (!template_ops) return 0; num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto out_free; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; goto out_free; } for (i = 0; i < num_ops; i++) ops[i].priv = new_table; new_table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret != 0) goto out_free; return ret; out_free: __ip6t_unregister_table(net, new_table); return ret; } void ip6t_unregister_table_pre_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ip6t_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ static struct xt_target ip6t_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NFPROTO_IPV6, #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, .compat_to_user = compat_standard_to_user, #endif }, { .name = XT_ERROR_TARGET, .target = ip6t_error, .targetsize = XT_FUNCTION_MAXNAMELEN, .family = NFPROTO_IPV6, }, }; static struct nf_sockopt_ops ip6t_sockopts = { .pf = PF_INET6, .set_optmin = IP6T_BASE_CTL, .set_optmax = IP6T_SO_SET_MAX+1, .set = do_ip6t_set_ctl, .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, .owner = THIS_MODULE, }; static int __net_init ip6_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV6); } static void __net_exit ip6_tables_net_exit(struct net *net) { xt_proto_fini(net, NFPROTO_IPV6); } static struct pernet_operations ip6_tables_net_ops = { .init = ip6_tables_net_init, .exit = ip6_tables_net_exit, }; static int __init ip6_tables_init(void) { int ret; ret = register_pernet_subsys(&ip6_tables_net_ops); if (ret < 0) goto err1; /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); if (ret < 0) goto err4; return 0; err4: xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); err2: unregister_pernet_subsys(&ip6_tables_net_ops); err1: return ret; } static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); unregister_pernet_subsys(&ip6_tables_net_ops); } EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); module_init(ip6_tables_init); module_exit(ip6_tables_fini);
236 238 238 237 237 228 227 1 173 226 171 228 171 226 1 190 173 227 246 246 246 246 246 190 229 190 5 244 247 200 3 216 215 21 199 207 5 1 128 237 243 140 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * * block.c */ /* * This file implements the low-level routines to read and decompress * datablocks and metadata blocks. */ #include <linux/blkdev.h> #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/string.h> #include <linux/bio.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" #include "page_actor.h" /* * Returns the amount of bytes copied to the page actor. */ static int copy_bio_to_actor(struct bio *bio, struct squashfs_page_actor *actor, int offset, int req_length) { void *actor_addr; struct bvec_iter_all iter_all = {}; struct bio_vec *bvec = bvec_init_iter_all(&iter_all); int copied_bytes = 0; int actor_offset = 0; squashfs_actor_nobuff(actor); actor_addr = squashfs_first_page(actor); if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) return 0; while (copied_bytes < req_length) { int bytes_to_copy = min_t(int, bvec->bv_len - offset, PAGE_SIZE - actor_offset); bytes_to_copy = min_t(int, bytes_to_copy, req_length - copied_bytes); if (!IS_ERR(actor_addr)) memcpy(actor_addr + actor_offset, bvec_virt(bvec) + offset, bytes_to_copy); actor_offset += bytes_to_copy; copied_bytes += bytes_to_copy; offset += bytes_to_copy; if (actor_offset >= PAGE_SIZE) { actor_addr = squashfs_next_page(actor); if (!actor_addr) break; actor_offset = 0; } if (offset >= bvec->bv_len) { if (!bio_next_segment(bio, &iter_all)) break; offset = 0; } } squashfs_finish_page(actor); return copied_bytes; } static int squashfs_bio_read_cached(struct bio *fullbio, struct address_space *cache_mapping, u64 index, int length, u64 read_start, u64 read_end, int page_count) { struct folio *head_to_cache = NULL, *tail_to_cache = NULL; struct block_device *bdev = fullbio->bi_bdev; int start_idx = 0, end_idx = 0; struct folio_iter fi; struct bio *bio = NULL; int idx = 0; int err = 0; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL struct folio **cache_folios = kmalloc_array(page_count, sizeof(*cache_folios), GFP_KERNEL | __GFP_ZERO); #endif bio_for_each_folio_all(fi, fullbio) { struct folio *folio = fi.folio; if (folio->mapping == cache_mapping) { idx++; continue; } /* * We only use this when the device block size is the same as * the page size, so read_start and read_end cover full pages. * * Compare these to the original required index and length to * only cache pages which were requested partially, since these * are the ones which are likely to be needed when reading * adjacent blocks. */ if (idx == 0 && index != read_start) head_to_cache = folio; else if (idx == page_count - 1 && index + length != read_end) tail_to_cache = folio; #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL /* Cache all pages in the BIO for repeated reads */ else if (cache_folios) cache_folios[idx] = folio; #endif if (!bio || idx != end_idx) { struct bio *new = bio_alloc_clone(bdev, fullbio, GFP_NOIO, &fs_bio_set); if (bio) { bio_trim(bio, start_idx * PAGE_SECTORS, (end_idx - start_idx) * PAGE_SECTORS); bio_chain(bio, new); submit_bio(bio); } bio = new; start_idx = idx; } idx++; end_idx = idx; } if (bio) { bio_trim(bio, start_idx * PAGE_SECTORS, (end_idx - start_idx) * PAGE_SECTORS); err = submit_bio_wait(bio); bio_put(bio); } if (err) return err; if (head_to_cache) { int ret = filemap_add_folio(cache_mapping, head_to_cache, read_start >> PAGE_SHIFT, GFP_NOIO); if (!ret) { folio_mark_uptodate(head_to_cache); folio_unlock(head_to_cache); } } if (tail_to_cache) { int ret = filemap_add_folio(cache_mapping, tail_to_cache, (read_end >> PAGE_SHIFT) - 1, GFP_NOIO); if (!ret) { folio_mark_uptodate(tail_to_cache); folio_unlock(tail_to_cache); } } #ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL if (!cache_folios) goto out; for (idx = 0; idx < page_count; idx++) { if (!cache_folios[idx]) continue; int ret = filemap_add_folio(cache_mapping, cache_folios[idx], (read_start >> PAGE_SHIFT) + idx, GFP_NOIO); if (!ret) { folio_mark_uptodate(cache_folios[idx]); folio_unlock(cache_folios[idx]); } } kfree(cache_folios); out: #endif return 0; } static struct page *squashfs_get_cache_page(struct address_space *mapping, pgoff_t index) { struct page *page; if (!mapping) return NULL; page = find_get_page(mapping, index); if (!page) return NULL; if (!PageUptodate(page)) { put_page(page); return NULL; } return page; } static int squashfs_bio_read(struct super_block *sb, u64 index, int length, struct bio **biop, int *block_offset) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct address_space *cache_mapping = msblk->cache_mapping; const u64 read_start = round_down(index, msblk->devblksize); const sector_t block = read_start >> msblk->devblksize_log2; const u64 read_end = round_up(index + length, msblk->devblksize); const sector_t block_end = read_end >> msblk->devblksize_log2; int offset = read_start - round_down(index, PAGE_SIZE); int total_len = (block_end - block) << msblk->devblksize_log2; const int page_count = DIV_ROUND_UP(total_len + offset, PAGE_SIZE); int error, i; struct bio *bio; bio = bio_kmalloc(page_count, GFP_NOIO); if (!bio) return -ENOMEM; bio_init_inline(bio, sb->s_bdev, page_count, REQ_OP_READ); bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT); for (i = 0; i < page_count; ++i) { unsigned int len = min_t(unsigned int, PAGE_SIZE - offset, total_len); pgoff_t index = (read_start >> PAGE_SHIFT) + i; struct page *page; page = squashfs_get_cache_page(cache_mapping, index); if (!page) page = alloc_page(GFP_NOIO); if (!page) { error = -ENOMEM; goto out_free_bio; } /* * Use the __ version to avoid merging since we need each page * to be separate when we check for and avoid cached pages. */ __bio_add_page(bio, page, len, offset); offset = 0; total_len -= len; } if (cache_mapping) error = squashfs_bio_read_cached(bio, cache_mapping, index, length, read_start, read_end, page_count); else error = submit_bio_wait(bio); if (error) goto out_free_bio; *biop = bio; *block_offset = index & ((1 << msblk->devblksize_log2) - 1); return 0; out_free_bio: bio_free_pages(bio); bio_uninit(bio); kfree(bio); return error; } /* * Read and decompress a metadata block or datablock. Length is non-zero * if a datablock is being read (the size is stored elsewhere in the * filesystem), otherwise the length is obtained from the first two bytes of * the metadata block. A bit in the length field indicates if the block * is stored uncompressed in the filesystem (usually because compression * generated a larger block - this does occasionally happen with compression * algorithms). */ int squashfs_read_data(struct super_block *sb, u64 index, int length, u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct bio *bio = NULL; int compressed; int res; int offset; if (length) { /* * Datablock. */ compressed = SQUASHFS_COMPRESSED_BLOCK(length); length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", index, compressed ? "" : "un", length, output->length); } else { /* * Metadata block. */ const u8 *data; struct bvec_iter_all iter_all = {}; struct bio_vec *bvec = bvec_init_iter_all(&iter_all); if (index + 2 > msblk->bytes_used) { res = -EIO; goto out; } res = squashfs_bio_read(sb, index, 2, &bio, &offset); if (res) goto out; if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) { res = -EIO; goto out_free_bio; } /* Extract the length of the metadata block */ data = bvec_virt(bvec); length = data[offset]; if (offset < bvec->bv_len - 1) { length |= data[offset + 1] << 8; } else { if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) { res = -EIO; goto out_free_bio; } data = bvec_virt(bvec); length |= data[0] << 8; } bio_free_pages(bio); bio_uninit(bio); kfree(bio); compressed = SQUASHFS_COMPRESSED(length); length = SQUASHFS_COMPRESSED_SIZE(length); index += 2; TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2, compressed ? "" : "un", length); } if (length <= 0 || length > output->length || (index + length) > msblk->bytes_used) { res = -EIO; goto out; } if (next_index) *next_index = index + length; res = squashfs_bio_read(sb, index, length, &bio, &offset); if (res) goto out; if (compressed) { if (!msblk->stream) { res = -EIO; goto out_free_bio; } res = msblk->thread_ops->decompress(msblk, bio, offset, length, output); } else { res = copy_bio_to_actor(bio, output, offset, length); } out_free_bio: bio_free_pages(bio); bio_uninit(bio); kfree(bio); out: if (res < 0) { ERROR("Failed to read block 0x%llx: %d\n", index, res); if (msblk->panic_on_errors) panic("squashfs read failed"); } return res; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_IO_H #define _ASM_X86_IO_H /* * This file contains the definitions for the x86 IO instructions * inb/inw/inl/outb/outw/outl and the "string versions" of the same * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" * versions of the single-IO instructions (inb_p/inw_p/..). * * This file is not meant to be obfuscating: it's just complicated * to (a) handle it all in a way that makes gcc able to optimize it * as well as possible and (b) trying to avoid writing the same thing * over and over again with slight variations and possibly making a * mistake somewhere. */ /* * Thanks to James van Artsdalen for a better timing-fix than * the two short jumps: using outb's to a nonexistent port seems * to guarantee better timings even on fast machines. * * On the other hand, I'd like to be sure of a non-existent port: * I feel a bit unsafe about using 0x80 (should be safe, though) * * Linus */ /* * Bit simplified and optimized by Jan Hubicka * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. * * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, * isa_read[wl] and isa_write[wl] fixed * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/string.h> #include <linux/compiler.h> #include <linux/cc_platform.h> #include <asm/page.h> #include <asm/early_ioremap.h> #include <asm/pgtable_types.h> #include <asm/shared/io.h> #include <asm/special_insns.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ :"m" (*(volatile type __force *)addr) barrier); return ret; } #define build_mmio_write(name, size, type, reg, barrier) \ static inline void name(type val, volatile void __iomem *addr) \ { asm volatile("mov" size " %0,%1": :reg (val), \ "m" (*(volatile type __force *)addr) barrier); } build_mmio_read(readb, "b", unsigned char, "=q", :"memory") build_mmio_read(readw, "w", unsigned short, "=r", :"memory") build_mmio_read(readl, "l", unsigned int, "=r", :"memory") build_mmio_read(__readb, "b", unsigned char, "=q", ) build_mmio_read(__readw, "w", unsigned short, "=r", ) build_mmio_read(__readl, "l", unsigned int, "=r", ) build_mmio_write(writeb, "b", unsigned char, "q", :"memory") build_mmio_write(writew, "w", unsigned short, "r", :"memory") build_mmio_write(writel, "l", unsigned int, "r", :"memory") build_mmio_write(__writeb, "b", unsigned char, "q", ) build_mmio_write(__writew, "w", unsigned short, "r", ) build_mmio_write(__writel, "l", unsigned int, "r", ) #define readb readb #define readw readw #define readl readl #define readb_relaxed(a) __readb(a) #define readw_relaxed(a) __readw(a) #define readl_relaxed(a) __readl(a) #define __raw_readb __readb #define __raw_readw __readw #define __raw_readl __readl #define writeb writeb #define writew writew #define writel writel #define writeb_relaxed(v, a) __writeb(v, a) #define writew_relaxed(v, a) __writew(v, a) #define writel_relaxed(v, a) __writel(v, a) #define __raw_writeb __writeb #define __raw_writew __writew #define __raw_writel __writel #ifdef CONFIG_X86_64 build_mmio_read(readq, "q", u64, "=r", :"memory") build_mmio_read(__readq, "q", u64, "=r", ) build_mmio_write(writeq, "q", u64, "r", :"memory") build_mmio_write(__writeq, "q", u64, "r", ) #define readq_relaxed(a) __readq(a) #define writeq_relaxed(v, a) __writeq(v, a) #define __raw_readq __readq #define __raw_writeq __writeq /* Let people know that we have them */ #define readq readq #define writeq writeq #endif #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); /** * virt_to_phys - map virtual addresses to physical * @address: address to remap * * The returned physical address is the physical (CPU) mapping for * the memory address given. It is only valid to use this function on * addresses directly mapped or allocated via kmalloc. * * This function does not give bus mappings for DMA transfers. In * almost all conceivable cases a device driver should not be using * this function */ static inline phys_addr_t virt_to_phys(volatile void *address) { return __pa(address); } #define virt_to_phys virt_to_phys /** * phys_to_virt - map physical address to virtual * @address: address to remap * * The returned virtual address is a current CPU mapping for * the memory address given. It is only valid to use this function on * addresses that have a kernel mapping * * This function does not handle bus mappings for DMA transfers. In * almost all conceivable cases a device driver should not be using * this function */ static inline void *phys_to_virt(phys_addr_t address) { return __va(address); } #define phys_to_virt phys_to_virt /* * ISA I/O bus memory addresses are 1:1 with the physical address. * However, we truncate the address to unsigned int to avoid undesirable * promotions in legacy drivers. */ static inline unsigned int isa_virt_to_bus(volatile void *address) { return (unsigned int)virt_to_phys(address); } #define isa_bus_to_virt phys_to_virt /* * The default ioremap() behavior is non-cached; if you need something * else, you probably want one of the following. */ extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); #define ioremap_uc ioremap_uc extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); #define ioremap_cache ioremap_cache extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, pgprot_t prot); #define ioremap_prot ioremap_prot extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size); #define ioremap_encrypted ioremap_encrypted void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags); #define arch_memremap_wb arch_memremap_wb /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory * @size: size of the resource to map * * ioremap performs a platform specific sequence of operations to * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual * address. * * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ void __iomem *ioremap(resource_size_t offset, unsigned long size); #define ioremap ioremap extern void iounmap(volatile void __iomem *addr); #define iounmap iounmap #ifdef __KERNEL__ void memcpy_fromio(void *, const volatile void __iomem *, size_t); void memcpy_toio(volatile void __iomem *, const void *, size_t); void memset_io(volatile void __iomem *, int, size_t); #define memcpy_fromio memcpy_fromio #define memcpy_toio memcpy_toio #define memset_io memset_io #ifdef CONFIG_X86_64 /* * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy * loop. */ static inline void __iowrite32_copy(void __iomem *to, const void *from, size_t count) { asm volatile("rep movsl" : "=&c"(count), "=&D"(to), "=&S"(from) : "0"(count), "1"(to), "2"(from) : "memory"); } #define __iowrite32_copy __iowrite32_copy #endif /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped * to PAGE_OFFSET is pure coincidence - it does not mean ISA values * are physical addresses. The following constant pointer can be * used as the IO-area pointer (it can be iounmapped as well, so the * analogy with PCI is quite large): */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) #endif /* __KERNEL__ */ extern void native_io_delay(void); extern int io_delay_type; extern void io_delay_init(void); #if defined(CONFIG_PARAVIRT) #include <asm/paravirt.h> #else static inline void slow_down_io(void) { native_io_delay(); #ifdef REALLY_SLOW_IO native_io_delay(); native_io_delay(); native_io_delay(); #endif } #endif #define BUILDIO(bwl, type) \ static inline void out##bwl##_p(type value, u16 port) \ { \ out##bwl(value, port); \ slow_down_io(); \ } \ \ static inline type in##bwl##_p(u16 port) \ { \ type value = in##bwl(port); \ slow_down_io(); \ return value; \ } \ \ static inline void outs##bwl(u16 port, const void *addr, unsigned long count) \ { \ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ type *value = (type *)addr; \ while (count) { \ out##bwl(*value, port); \ value++; \ count--; \ } \ } else { \ asm volatile("rep outs" #bwl \ : "+S"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } \ \ static inline void ins##bwl(u16 port, void *addr, unsigned long count) \ { \ if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { \ type *value = (type *)addr; \ while (count) { \ *value = in##bwl(port); \ value++; \ count--; \ } \ } else { \ asm volatile("rep ins" #bwl \ : "+D"(addr), "+c"(count) \ : "d"(port) : "memory"); \ } \ } BUILDIO(b, u8) BUILDIO(w, u16) BUILDIO(l, u32) #undef BUILDIO #define inb_p inb_p #define inw_p inw_p #define inl_p inl_p #define insb insb #define insw insw #define insl insl #define outb_p outb_p #define outw_p outw_p #define outl_p outl_p #define outsb outsb #define outsw outsw #define outsl outsl extern void *xlate_dev_mem_ptr(phys_addr_t phys); extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define xlate_dev_mem_ptr xlate_dev_mem_ptr #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); #define ioremap_wc ioremap_wc extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); #define ioremap_wt ioremap_wt extern bool is_early_ioremap_ptep(pte_t *ptep); #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> #undef PCI_IOBASE #ifdef CONFIG_MTRR extern int __must_check arch_phys_wc_index(int handle); #define arch_phys_wc_index arch_phys_wc_index extern int __must_check arch_phys_wc_add(unsigned long base, unsigned long size); extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif #ifdef CONFIG_X86_PAT extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif #ifdef CONFIG_AMD_MEM_ENCRYPT extern bool arch_memremap_can_ram_remap(resource_size_t offset, unsigned long size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); #else static inline bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size) { return true; } #endif /** * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units * @dst: destination, in MMIO space (must be 512-bit aligned) * @src: source * @count: number of 512 bits quantities to submit * * Submit data from kernel space to MMIO space, in units of 512 bits at a * time. Order of access is not guaranteed, nor is a memory barrier * performed afterwards. * * Warning: Do not use this helper unless your driver has checked that the CPU * instruction is supported on the platform. */ static inline void iosubmit_cmds512(void __iomem *dst, const void *src, size_t count) { const u8 *from = src; const u8 *end = from + count * 64; while (from < end) { movdir64b_io(dst, from); from += 64; } } #endif /* _ASM_X86_IO_H */
1 1 1 1 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/sysctl.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/in.h> #include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/slab.h> #include <linux/uaccess.h> #include <asm/io.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/ip.h> #include <net/arp.h> #include <net/ax25.h> #include <net/netrom.h> /* * Only allow IP over NET/ROM frames through if the netrom device is up. */ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; if (!netif_running(dev)) { stats->rx_dropped++; return 0; } stats->rx_packets++; stats->rx_bytes += skb->len; skb->protocol = htons(ETH_P_IP); /* Spoof incoming device */ skb->dev = dev; skb->mac_header = skb->network_header; skb_reset_network_header(skb); skb->pkt_type = PACKET_HOST; netif_rx(skb); return 1; } static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { unsigned char *buff = skb_push(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); memcpy(buff, (saddr != NULL) ? saddr : dev->dev_addr, dev->addr_len); buff[6] &= ~AX25_CBIT; buff[6] &= ~AX25_EBIT; buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; if (daddr != NULL) memcpy(buff, daddr, dev->addr_len); buff[6] &= ~AX25_CBIT; buff[6] |= AX25_EBIT; buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; *buff++ = 0; *buff++ = 0; *buff++ = NR_PROTOEXT; if (daddr != NULL) return 37; return -37; } static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; int err; if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) return 0; if (dev->flags & IFF_UP) { err = ax25_listen_register((ax25_address *)sa->sa_data, NULL); if (err) return err; ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); } dev_addr_set(dev, sa->sa_data); return 0; } static int nr_open(struct net_device *dev) { int err; err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL); if (err) return err; netif_start_queue(dev); return 0; } static int nr_close(struct net_device *dev) { ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); netif_stop_queue(dev); return 0; } static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_device_stats *stats = &dev->stats; unsigned int len = skb->len; if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); stats->tx_errors++; return NETDEV_TX_OK; } stats->tx_packets++; stats->tx_bytes += len; return NETDEV_TX_OK; } static const struct header_ops nr_header_ops = { .create = nr_header, }; static const struct net_device_ops nr_netdev_ops = { .ndo_open = nr_open, .ndo_stop = nr_close, .ndo_start_xmit = nr_xmit, .ndo_set_mac_address = nr_set_mac_address, }; void nr_setup(struct net_device *dev) { dev->mtu = NR_MAX_PACKET_SIZE; dev->netdev_ops = &nr_netdev_ops; dev->header_ops = &nr_header_ops; dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; /* New-style flags. */ dev->flags = IFF_NOARP; }
29 29 29 22 34 5 29 24 3 22 28 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 // SPDX-License-Identifier: GPL-2.0 /* * The Virtual DTV test driver serves as a reference DVB driver and helps * validate the existing APIs in the media subsystem. It can also aid * developers working on userspace applications. * * When this module is loaded, it will attempt to modprobe 'dvb_vidtv_tuner' * and 'dvb_vidtv_demod'. * * Copyright (C) 2020 Daniel W. S. Almeida */ #include <linux/dev_printk.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/time.h> #include <linux/types.h> #include <linux/workqueue.h> #include <media/dvbdev.h> #include <media/media-device.h> #include "vidtv_bridge.h" #include "vidtv_common.h" #include "vidtv_demod.h" #include "vidtv_mux.h" #include "vidtv_ts.h" #include "vidtv_tuner.h" #define MUX_BUF_MIN_SZ 90164 #define MUX_BUF_MAX_SZ (MUX_BUF_MIN_SZ * 10) #define TUNER_DEFAULT_ADDR 0x68 #define DEMOD_DEFAULT_ADDR 0x60 #define VIDTV_DEFAULT_NETWORK_ID 0xff44 #define VIDTV_DEFAULT_NETWORK_NAME "LinuxTV.org" #define VIDTV_DEFAULT_TS_ID 0x4081 /* * The LNBf fake parameters here are the ranges used by an * Universal (extended) European LNBf, which is likely the most common LNBf * found on Satellite digital TV system nowadays. */ #define LNB_CUT_FREQUENCY 11700000 /* high IF frequency */ #define LNB_LOW_FREQ 9750000 /* low IF frequency */ #define LNB_HIGH_FREQ 10600000 /* transition frequency */ static unsigned int drop_tslock_prob_on_low_snr; module_param(drop_tslock_prob_on_low_snr, uint, 0444); MODULE_PARM_DESC(drop_tslock_prob_on_low_snr, "Probability of losing the TS lock if the signal quality is bad"); static unsigned int recover_tslock_prob_on_good_snr; module_param(recover_tslock_prob_on_good_snr, uint, 0444); MODULE_PARM_DESC(recover_tslock_prob_on_good_snr, "Probability recovering the TS lock when the signal improves"); static unsigned int mock_power_up_delay_msec; module_param(mock_power_up_delay_msec, uint, 0444); MODULE_PARM_DESC(mock_power_up_delay_msec, "Simulate a power up delay"); static unsigned int mock_tune_delay_msec; module_param(mock_tune_delay_msec, uint, 0444); MODULE_PARM_DESC(mock_tune_delay_msec, "Simulate a tune delay"); static unsigned int vidtv_valid_dvb_t_freqs[NUM_VALID_TUNER_FREQS] = { 474000000 }; module_param_array(vidtv_valid_dvb_t_freqs, uint, NULL, 0444); MODULE_PARM_DESC(vidtv_valid_dvb_t_freqs, "Valid DVB-T frequencies to simulate, in Hz"); static unsigned int vidtv_valid_dvb_c_freqs[NUM_VALID_TUNER_FREQS] = { 474000000 }; module_param_array(vidtv_valid_dvb_c_freqs, uint, NULL, 0444); MODULE_PARM_DESC(vidtv_valid_dvb_c_freqs, "Valid DVB-C frequencies to simulate, in Hz"); static unsigned int vidtv_valid_dvb_s_freqs[NUM_VALID_TUNER_FREQS] = { 11362000 }; module_param_array(vidtv_valid_dvb_s_freqs, uint, NULL, 0444); MODULE_PARM_DESC(vidtv_valid_dvb_s_freqs, "Valid DVB-S/S2 frequencies to simulate at Ku-Band, in kHz"); static unsigned int max_frequency_shift_hz; module_param(max_frequency_shift_hz, uint, 0444); MODULE_PARM_DESC(max_frequency_shift_hz, "Maximum shift in HZ allowed when tuning in a channel"); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nums); /* * Influences the signal acquisition time. See ISO/IEC 13818-1 : 2000. p. 113. */ static unsigned int si_period_msec = 40; module_param(si_period_msec, uint, 0444); MODULE_PARM_DESC(si_period_msec, "How often to send SI packets. Default: 40ms"); static unsigned int pcr_period_msec = 40; module_param(pcr_period_msec, uint, 0444); MODULE_PARM_DESC(pcr_period_msec, "How often to send PCR packets. Default: 40ms"); static unsigned int mux_rate_kbytes_sec = 4096; module_param(mux_rate_kbytes_sec, uint, 0444); MODULE_PARM_DESC(mux_rate_kbytes_sec, "Mux rate: will pad stream if below"); static unsigned int pcr_pid = 0x200; module_param(pcr_pid, uint, 0444); MODULE_PARM_DESC(pcr_pid, "PCR PID for all channels: defaults to 0x200"); static unsigned int mux_buf_sz_pkts; module_param(mux_buf_sz_pkts, uint, 0444); MODULE_PARM_DESC(mux_buf_sz_pkts, "Size for the internal mux buffer in multiples of 188 bytes"); static u32 vidtv_bridge_mux_buf_sz_for_mux_rate(void) { u32 max_elapsed_time_msecs = VIDTV_MAX_SLEEP_USECS / USEC_PER_MSEC; u32 mux_buf_sz = mux_buf_sz_pkts * TS_PACKET_LEN; u32 nbytes_expected; nbytes_expected = mux_rate_kbytes_sec; nbytes_expected *= max_elapsed_time_msecs; mux_buf_sz = roundup(nbytes_expected, TS_PACKET_LEN); mux_buf_sz += mux_buf_sz / 10; if (mux_buf_sz < MUX_BUF_MIN_SZ) mux_buf_sz = MUX_BUF_MIN_SZ; if (mux_buf_sz > MUX_BUF_MAX_SZ) mux_buf_sz = MUX_BUF_MAX_SZ; return mux_buf_sz; } static bool vidtv_bridge_check_demod_lock(struct vidtv_dvb *dvb, u32 n) { enum fe_status status; dvb->fe[n]->ops.read_status(dvb->fe[n], &status); return status == (FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK); } /* * called on a separate thread by the mux when new packets become available */ static void vidtv_bridge_on_new_pkts_avail(void *priv, u8 *buf, u32 npkts) { struct vidtv_dvb *dvb = priv; /* drop packets if we lose the lock */ if (vidtv_bridge_check_demod_lock(dvb, 0)) dvb_dmx_swfilter_packets(&dvb->demux, buf, npkts); } static int vidtv_start_streaming(struct vidtv_dvb *dvb) { struct vidtv_mux_init_args mux_args = { .mux_rate_kbytes_sec = mux_rate_kbytes_sec, .on_new_packets_available_cb = vidtv_bridge_on_new_pkts_avail, .pcr_period_usecs = pcr_period_msec * USEC_PER_MSEC, .si_period_usecs = si_period_msec * USEC_PER_MSEC, .pcr_pid = pcr_pid, .transport_stream_id = VIDTV_DEFAULT_TS_ID, .network_id = VIDTV_DEFAULT_NETWORK_ID, .network_name = VIDTV_DEFAULT_NETWORK_NAME, .priv = dvb, }; struct device *dev = &dvb->pdev->dev; u32 mux_buf_sz; if (dvb->streaming) { dev_warn_ratelimited(dev, "Already streaming. Skipping.\n"); return 0; } if (mux_buf_sz_pkts) mux_buf_sz = mux_buf_sz_pkts; else mux_buf_sz = vidtv_bridge_mux_buf_sz_for_mux_rate(); mux_args.mux_buf_sz = mux_buf_sz; dvb->mux = vidtv_mux_init(dvb->fe[0], dev, &mux_args); if (!dvb->mux) return -ENOMEM; dvb->streaming = true; vidtv_mux_start_thread(dvb->mux); dev_dbg_ratelimited(dev, "Started streaming\n"); return 0; } static int vidtv_stop_streaming(struct vidtv_dvb *dvb) { struct device *dev = &dvb->pdev->dev; if (!dvb->streaming) { dev_warn_ratelimited(dev, "No streaming. Skipping.\n"); return 0; } dvb->streaming = false; vidtv_mux_stop_thread(dvb->mux); vidtv_mux_destroy(dvb->mux); dvb->mux = NULL; dev_dbg_ratelimited(dev, "Stopped streaming\n"); return 0; } static int vidtv_start_feed(struct dvb_demux_feed *feed) { struct dvb_demux *demux = feed->demux; struct vidtv_dvb *dvb = demux->priv; int ret; int rc; if (!demux->dmx.frontend) return -EINVAL; mutex_lock(&dvb->feed_lock); dvb->nfeeds++; rc = dvb->nfeeds; if (dvb->nfeeds == 1) { ret = vidtv_start_streaming(dvb); if (ret < 0) rc = ret; } mutex_unlock(&dvb->feed_lock); return rc; } static int vidtv_stop_feed(struct dvb_demux_feed *feed) { struct dvb_demux *demux = feed->demux; struct vidtv_dvb *dvb = demux->priv; int err = 0; mutex_lock(&dvb->feed_lock); dvb->nfeeds--; if (!dvb->nfeeds) err = vidtv_stop_streaming(dvb); mutex_unlock(&dvb->feed_lock); return err; } static struct dvb_frontend *vidtv_get_frontend_ptr(struct i2c_client *c) { struct vidtv_demod_state *state = i2c_get_clientdata(c); /* the demod will set this when its probe function runs */ return &state->frontend; } static int vidtv_master_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg msgs[], int num) { /* * Right now, this virtual driver doesn't really send or receive * messages from I2C. A real driver will require an implementation * here. */ return 0; } static u32 vidtv_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static const struct i2c_algorithm vidtv_i2c_algorithm = { .master_xfer = vidtv_master_xfer, .functionality = vidtv_i2c_func, }; static int vidtv_bridge_i2c_register_adap(struct vidtv_dvb *dvb) { struct i2c_adapter *i2c_adapter = &dvb->i2c_adapter; strscpy(i2c_adapter->name, "vidtv_i2c", sizeof(i2c_adapter->name)); i2c_adapter->owner = THIS_MODULE; i2c_adapter->algo = &vidtv_i2c_algorithm; i2c_adapter->algo_data = NULL; i2c_adapter->timeout = 500; i2c_adapter->retries = 3; i2c_adapter->dev.parent = &dvb->pdev->dev; i2c_set_adapdata(i2c_adapter, dvb); return i2c_add_adapter(&dvb->i2c_adapter); } static int vidtv_bridge_register_adap(struct vidtv_dvb *dvb) { int ret = 0; ret = dvb_register_adapter(&dvb->adapter, KBUILD_MODNAME, THIS_MODULE, &dvb->i2c_adapter.dev, adapter_nums); return ret; } static int vidtv_bridge_dmx_init(struct vidtv_dvb *dvb) { dvb->demux.dmx.capabilities = DMX_TS_FILTERING | DMX_SECTION_FILTERING; dvb->demux.priv = dvb; dvb->demux.filternum = 256; dvb->demux.feednum = 256; dvb->demux.start_feed = vidtv_start_feed; dvb->demux.stop_feed = vidtv_stop_feed; return dvb_dmx_init(&dvb->demux); } static int vidtv_bridge_dmxdev_init(struct vidtv_dvb *dvb) { dvb->dmx_dev.filternum = 256; dvb->dmx_dev.demux = &dvb->demux.dmx; dvb->dmx_dev.capabilities = 0; return dvb_dmxdev_init(&dvb->dmx_dev, &dvb->adapter); } static int vidtv_bridge_probe_demod(struct vidtv_dvb *dvb, u32 n) { struct vidtv_demod_config cfg = { .drop_tslock_prob_on_low_snr = drop_tslock_prob_on_low_snr, .recover_tslock_prob_on_good_snr = recover_tslock_prob_on_good_snr, }; dvb->i2c_client_demod[n] = dvb_module_probe("dvb_vidtv_demod", NULL, &dvb->i2c_adapter, DEMOD_DEFAULT_ADDR, &cfg); /* driver will not work anyways so bail out */ if (!dvb->i2c_client_demod[n]) return -ENODEV; /* retrieve a ptr to the frontend state */ dvb->fe[n] = vidtv_get_frontend_ptr(dvb->i2c_client_demod[n]); return 0; } static int vidtv_bridge_probe_tuner(struct vidtv_dvb *dvb, u32 n) { struct vidtv_tuner_config cfg = { .fe = dvb->fe[n], .mock_power_up_delay_msec = mock_power_up_delay_msec, .mock_tune_delay_msec = mock_tune_delay_msec, }; u32 freq; int i; /* TODO: check if the frequencies are at a valid range */ memcpy(cfg.vidtv_valid_dvb_t_freqs, vidtv_valid_dvb_t_freqs, sizeof(vidtv_valid_dvb_t_freqs)); memcpy(cfg.vidtv_valid_dvb_c_freqs, vidtv_valid_dvb_c_freqs, sizeof(vidtv_valid_dvb_c_freqs)); /* * Convert Satellite frequencies from Ku-band in kHZ into S-band * frequencies in Hz. */ for (i = 0; i < ARRAY_SIZE(vidtv_valid_dvb_s_freqs); i++) { freq = vidtv_valid_dvb_s_freqs[i]; if (freq) { if (freq < LNB_CUT_FREQUENCY) freq = abs(freq - LNB_LOW_FREQ); else freq = abs(freq - LNB_HIGH_FREQ); } cfg.vidtv_valid_dvb_s_freqs[i] = freq; } cfg.max_frequency_shift_hz = max_frequency_shift_hz; dvb->i2c_client_tuner[n] = dvb_module_probe("dvb_vidtv_tuner", NULL, &dvb->i2c_adapter, TUNER_DEFAULT_ADDR, &cfg); return (dvb->i2c_client_tuner[n]) ? 0 : -ENODEV; } static int vidtv_bridge_dvb_init(struct vidtv_dvb *dvb) { int ret, i, j; ret = vidtv_bridge_i2c_register_adap(dvb); if (ret < 0) goto fail_i2c; ret = vidtv_bridge_register_adap(dvb); if (ret < 0) goto fail_adapter; dvb_register_media_controller(&dvb->adapter, &dvb->mdev); for (i = 0; i < NUM_FE; ++i) { ret = vidtv_bridge_probe_demod(dvb, i); if (ret < 0) goto fail_demod_probe; ret = vidtv_bridge_probe_tuner(dvb, i); if (ret < 0) goto fail_tuner_probe; ret = dvb_register_frontend(&dvb->adapter, dvb->fe[i]); if (ret < 0) goto fail_fe; } ret = vidtv_bridge_dmx_init(dvb); if (ret < 0) goto fail_dmx; ret = vidtv_bridge_dmxdev_init(dvb); if (ret < 0) goto fail_dmx_dev; for (j = 0; j < NUM_FE; ++j) { ret = dvb->demux.dmx.connect_frontend(&dvb->demux.dmx, &dvb->dmx_fe[j]); if (ret < 0) goto fail_dmx_conn; /* * The source of the demux is a frontend connected * to the demux. */ dvb->dmx_fe[j].source = DMX_FRONTEND_0; } return ret; fail_dmx_conn: for (j = j - 1; j >= 0; --j) dvb->demux.dmx.remove_frontend(&dvb->demux.dmx, &dvb->dmx_fe[j]); dvb_dmxdev_release(&dvb->dmx_dev); fail_dmx_dev: dvb_dmx_release(&dvb->demux); fail_dmx: fail_demod_probe: for (i = i - 1; i >= 0; --i) { dvb_unregister_frontend(dvb->fe[i]); fail_fe: dvb_module_release(dvb->i2c_client_tuner[i]); fail_tuner_probe: dvb_module_release(dvb->i2c_client_demod[i]); } fail_adapter: dvb_unregister_adapter(&dvb->adapter); fail_i2c: i2c_del_adapter(&dvb->i2c_adapter); return ret; } static int vidtv_bridge_probe(struct platform_device *pdev) { struct vidtv_dvb *dvb; int ret; dvb = kzalloc(sizeof(*dvb), GFP_KERNEL); if (!dvb) return -ENOMEM; dvb->pdev = pdev; #ifdef CONFIG_MEDIA_CONTROLLER_DVB dvb->mdev.dev = &pdev->dev; strscpy(dvb->mdev.model, "vidtv", sizeof(dvb->mdev.model)); strscpy(dvb->mdev.bus_info, "platform:vidtv", sizeof(dvb->mdev.bus_info)); media_device_init(&dvb->mdev); #endif ret = vidtv_bridge_dvb_init(dvb); if (ret < 0) goto err_dvb; mutex_init(&dvb->feed_lock); platform_set_drvdata(pdev, dvb); #ifdef CONFIG_MEDIA_CONTROLLER_DVB ret = media_device_register(&dvb->mdev); if (ret) { dev_err(dvb->mdev.dev, "media device register failed (err=%d)\n", ret); goto err_media_device_register; } #endif /* CONFIG_MEDIA_CONTROLLER_DVB */ dev_info(&pdev->dev, "Successfully initialized vidtv!\n"); return ret; #ifdef CONFIG_MEDIA_CONTROLLER_DVB err_media_device_register: media_device_cleanup(&dvb->mdev); #endif /* CONFIG_MEDIA_CONTROLLER_DVB */ err_dvb: kfree(dvb); return ret; } static void vidtv_bridge_remove(struct platform_device *pdev) { struct vidtv_dvb *dvb; u32 i; dvb = platform_get_drvdata(pdev); #ifdef CONFIG_MEDIA_CONTROLLER_DVB media_device_unregister(&dvb->mdev); media_device_cleanup(&dvb->mdev); #endif /* CONFIG_MEDIA_CONTROLLER_DVB */ mutex_destroy(&dvb->feed_lock); for (i = 0; i < NUM_FE; ++i) { dvb_unregister_frontend(dvb->fe[i]); dvb_module_release(dvb->i2c_client_tuner[i]); dvb_module_release(dvb->i2c_client_demod[i]); } dvb_dmxdev_release(&dvb->dmx_dev); dvb_dmx_release(&dvb->demux); dvb_unregister_adapter(&dvb->adapter); dev_info(&pdev->dev, "Successfully removed vidtv\n"); } static void vidtv_bridge_dev_release(struct device *dev) { struct vidtv_dvb *dvb; dvb = dev_get_drvdata(dev); kfree(dvb); } static struct platform_device vidtv_bridge_dev = { .name = VIDTV_PDEV_NAME, .dev.release = vidtv_bridge_dev_release, }; static struct platform_driver vidtv_bridge_driver = { .driver = { .name = VIDTV_PDEV_NAME, }, .probe = vidtv_bridge_probe, .remove = vidtv_bridge_remove, }; static void __exit vidtv_bridge_exit(void) { platform_driver_unregister(&vidtv_bridge_driver); platform_device_unregister(&vidtv_bridge_dev); } static int __init vidtv_bridge_init(void) { int ret; ret = platform_device_register(&vidtv_bridge_dev); if (ret) return ret; ret = platform_driver_register(&vidtv_bridge_driver); if (ret) platform_device_unregister(&vidtv_bridge_dev); return ret; } module_init(vidtv_bridge_init); module_exit(vidtv_bridge_exit); MODULE_DESCRIPTION("Virtual Digital TV Test Driver"); MODULE_AUTHOR("Daniel W. S. Almeida"); MODULE_LICENSE("GPL"); MODULE_ALIAS("vidtv"); MODULE_ALIAS("dvb_vidtv");
16 8 8 16 2 2 2 1 2 2 2 18 2 16 18 18 9 1 14 13 13 13 18 2 2 2 2 19 17 1 18 5 18 18 9 17 16 14 15 17 1 17 18 17 17 18 3 12 3 13 2 17 10 1 2 10 7 3 10 7 3 10 10 10 5 3 10 10 26 27 28 1 9 10 8 18 18 6 3 28 1 4 28 28 29 22 29 28 19 10 19 29 29 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018-2019 HUAWEI, Inc. * https://www.huawei.com/ */ #include "internal.h" #include <linux/unaligned.h> #include <trace/events/erofs.h> struct z_erofs_maprecorder { struct inode *inode; struct erofs_map_blocks *map; unsigned long lcn; /* compression extent information gathered */ u8 type, headtype; u16 clusterofs; u16 delta[2]; erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; bool partialref, in_mbox; }; static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, unsigned long lcn) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize) + lcn * sizeof(struct z_erofs_lcluster_index); struct z_erofs_lcluster_index *di; unsigned int advise; di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(di)) return PTR_ERR(di); m->lcn = lcn; m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index); advise = le16_to_cpu(di->di_advise); m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << vi->z_lclusterbits; m->delta[0] = le16_to_cpu(di->di_u.delta[0]); if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) { if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; } m->delta[1] = le16_to_cpu(di->di_u.delta[1]); } else { m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); m->clusterofs = le16_to_cpu(di->di_clusterofs); m->pblk = le32_to_cpu(di->di_u.blkaddr); } return 0; } static unsigned int decode_compactedbits(unsigned int lobits, u8 *in, unsigned int pos, u8 *type) { const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); const unsigned int lo = v & ((1 << lobits) - 1); *type = (v >> lobits) & 3; return lo; } static int get_compacted_la_distance(unsigned int lobits, unsigned int encodebits, unsigned int vcnt, u8 *in, int i) { unsigned int lo, d1 = 0; u8 type; DBG_BUGON(i >= vcnt); do { lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) return d1; ++d1; } while (++i < vcnt); /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */ if (!(lo & Z_EROFS_LI_D0_CBLKCNT)) d1 += lo - 1; return d1; } static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, unsigned long lcn, bool lookahead) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize); const unsigned int lclusterbits = vi->z_lclusterbits; const unsigned int totalidx = erofs_iblks(inode); unsigned int compacted_4b_initial, compacted_2b, amortizedshift; unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; erofs_off_t pos; u8 *in, type; int i; if (lcn >= totalidx || lclusterbits > 14) return -EINVAL; m->lcn = lcn; /* used to align to 32-byte (compacted_2b) alignment */ compacted_4b_initial = ((32 - ebase % 32) / 4) & 7; compacted_2b = 0; if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && compacted_4b_initial < totalidx) compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); pos = ebase; amortizedshift = 2; /* compact_4b */ if (lcn >= compacted_4b_initial) { pos += compacted_4b_initial * 4; lcn -= compacted_4b_initial; if (lcn < compacted_2b) { amortizedshift = 1; } else { pos += compacted_2b * 2; lcn -= compacted_2b; } } pos += lcn * (1 << amortizedshift); /* figure out the lcluster count in this pack */ if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; else if (1 << amortizedshift == 2 && lclusterbits <= 12) vcnt = 16; else return -EOPNOTSUPP; in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox); if (IS_ERR(in)) return PTR_ERR(in); /* it doesn't equal to round_up(..) */ m->nextpackoff = round_down(pos, vcnt << amortizedshift) + (vcnt << amortizedshift); lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U); encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; bytes = pos & ((vcnt << amortizedshift) - 1); in -= bytes; i = bytes >> amortizedshift; lo = decode_compactedbits(lobits, in, encodebits * i, &type); m->type = type; if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << lclusterbits; /* figure out lookahead_distance: delta[1] if needed */ if (lookahead) m->delta[1] = get_compacted_la_distance(lobits, encodebits, vcnt, in, i); if (lo & Z_EROFS_LI_D0_CBLKCNT) { if (!big_pcluster) { DBG_BUGON(1); return -EFSCORRUPTED; } m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; return 0; } else if (i + 1 != (int)vcnt) { m->delta[0] = lo; return 0; } /* * since the last lcluster in the pack is special, * of which lo saves delta[1] rather than delta[0]. * Hence, get delta[0] by the previous lcluster indirectly. */ lo = decode_compactedbits(lobits, in, encodebits * (i - 1), &type); if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) lo = 0; else if (lo & Z_EROFS_LI_D0_CBLKCNT) lo = 1; m->delta[0] = lo + 1; return 0; } m->clusterofs = lo; m->delta[0] = 0; /* figout out blkaddr (pblk) for HEAD lclusters */ if (!big_pcluster) { nblk = 1; while (i > 0) { --i; lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) i -= lo; if (i >= 0) ++nblk; } } else { nblk = 0; while (i > 0) { --i; lo = decode_compactedbits(lobits, in, encodebits * i, &type); if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { if (lo & Z_EROFS_LI_D0_CBLKCNT) { --i; nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT; continue; } /* bigpcluster shouldn't have plain d0 == 1 */ if (lo <= 1) { DBG_BUGON(1); return -EFSCORRUPTED; } i -= lo - 2; continue; } ++nblk; } } in += (vcnt << amortizedshift) - sizeof(__le32); m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; return 0; } static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { struct erofs_inode *vi = EROFS_I(m->inode); int err; if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) { err = z_erofs_load_compact_lcluster(m, lcn, lookahead); } else { DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL); err = z_erofs_load_full_lcluster(m, lcn); } if (err) return err; if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu", m->type, lcn, EROFS_I(m->inode)->nid); DBG_BUGON(1); return -EOPNOTSUPP; } else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->clusterofs >= (1 << vi->z_lclusterbits)) { DBG_BUGON(1); return -EFSCORRUPTED; } return 0; } static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { struct super_block *sb = m->inode->i_sb; struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_lclusterbits; while (m->lcn >= lookback_distance) { unsigned long lcn = m->lcn - lookback_distance; int err; if (!lookback_distance) break; err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; continue; } m->headtype = m->type; m->map->m_la = (lcn << lclusterbits) | m->clusterofs; return 0; } erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", lookback_distance, m->lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, unsigned int initial_lcn) { struct inode *inode = m->inode; struct super_block *sb = inode->i_sb; struct erofs_inode *vi = EROFS_I(inode); bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2; unsigned long lcn = m->lcn + 1; int err; DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); DBG_BUGON(m->type != m->headtype); if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) || ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) || (lcn << vi->z_lclusterbits) >= inode->i_size) m->compressedblks = 1; if (m->compressedblks) goto out; err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; /* * If the 1st NONHEAD lcluster has already been handled initially w/o * valid compressedblks, which means at least it mustn't be CBLKCNT, or * an internal implemenatation error is detected. * * The following code can also handle it properly anyway, but let's * BUG_ON in the debugging mode only for developers to notice that. */ DBG_BUGON(lcn == initial_lcn && m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) { erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } /* * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather * than CBLKCNT, it's a 1 block-sized pcluster. */ if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks) m->compressedblks = 1; out: m->map->m_plen = erofs_pos(sb, m->compressedblks); return 0; } static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) { struct inode *inode = m->inode; struct erofs_inode *vi = EROFS_I(inode); struct erofs_map_blocks *map = m->map; unsigned int lclusterbits = vi->z_lclusterbits; u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; int err; while (1) { /* handle the last EOF pcluster (no next HEAD lcluster) */ if ((lcn << lclusterbits) >= inode->i_size) { map->m_llen = inode->i_size - map->m_la; return 0; } err = z_erofs_load_lcluster_from_disk(m, lcn, true); if (err) return err; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { /* work around invalid d1 generated by pre-1.0 mkfs */ if (unlikely(!m->delta[1])) { m->delta[1] = 1; DBG_BUGON(1); } } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { if (lcn != headlcn) break; /* ends at the next HEAD lcluster */ m->delta[1] = 1; } lcn += m->delta[1]; } map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la; return 0; } static int z_erofs_map_blocks_fo(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *vi = EROFS_I(inode); struct super_block *sb = inode->i_sb; bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; bool ztailpacking = vi->z_idata_size; unsigned int lclusterbits = vi->z_lclusterbits; struct z_erofs_maprecorder m = { .inode = inode, .map = map, .in_mbox = erofs_inode_in_metabox(inode), }; unsigned int endoff; unsigned long initial_lcn; unsigned long long ofs, end; int err; ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) && !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false); if (err) goto unmap_out; if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking) vi->z_fragmentoff = m.nextpackoff; map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) { m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; /* * For ztailpacking files, in order to inline data more * effectively, special EOF lclusters are now supported * which can have three parts at most. */ if (ztailpacking && end > inode->i_size) end = inode->i_size; } else { if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) { end = (m.lcn << lclusterbits) | m.clusterofs; map->m_flags |= EROFS_MAP_FULL_MAPPED; m.delta[0] = 1; } /* get the corresponding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; } if (m.partialref) map->m_flags |= EROFS_MAP_PARTIAL_REF; map->m_llen = end - map->m_la; if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; /* for non-compact indexes, fragmentoff is 64 bits */ if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL) vi->z_fragmentoff |= (u64)m.pblk << 32; } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_META; map->m_pa = vi->z_fragmentoff; map->m_plen = vi->z_idata_size; if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { erofs_err(sb, "ztailpacking inline data across blocks @ nid %llu", vi->nid); err = -EFSCORRUPTED; goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (err) goto unmap_out; } if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) { if (map->m_llen > map->m_plen) { DBG_BUGON(1); err = -EFSCORRUPTED; goto unmap_out; } if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; else map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) { map->m_algorithmformat = vi->z_algorithmtype[1]; } else { map->m_algorithmformat = vi->z_algorithmtype[0]; } if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA || map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE || map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) && map->m_llen >= i_blocksize(inode))) { err = z_erofs_get_extent_decompressedlen(&m); if (!err) map->m_flags |= EROFS_MAP_FULL_MAPPED; } unmap_out: erofs_unmap_metabuf(&m.map->buf); return err; } static int z_erofs_map_blocks_ext(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *vi = EROFS_I(inode); struct super_block *sb = inode->i_sb; bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER; unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize), recsz); bool in_mbox = erofs_inode_in_metabox(inode); erofs_off_t lend = inode->i_size; erofs_off_t l, r, mid, pa, la, lstart; struct z_erofs_extent *ext; unsigned int fmt; bool last; map->m_flags = 0; if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) { if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) { ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); pa = le64_to_cpu(*(__le64 *)ext); pos += sizeof(__le64); lstart = 0; } else { lstart = round_down(map->m_la, 1 << vi->z_lclusterbits); pos += (lstart >> vi->z_lclusterbits) * recsz; pa = EROFS_NULL_ADDR; } for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) { ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); map->m_plen = le32_to_cpu(ext->plen); if (pa != EROFS_NULL_ADDR) { map->m_pa = pa; pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK; } else { map->m_pa = le32_to_cpu(ext->pstart_lo); } pos += recsz; } last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits)); lend = min(lstart, lend); lstart -= 1 << vi->z_lclusterbits; } else { lstart = lend; for (l = 0, r = vi->z_extents; l < r; ) { mid = l + (r - l) / 2; ext = erofs_read_metabuf(&map->buf, sb, pos + mid * recsz, in_mbox); if (IS_ERR(ext)) return PTR_ERR(ext); la = le32_to_cpu(ext->lstart_lo); pa = le32_to_cpu(ext->pstart_lo) | (u64)le32_to_cpu(ext->pstart_hi) << 32; if (recsz > offsetof(struct z_erofs_extent, lstart_hi)) la |= (u64)le32_to_cpu(ext->lstart_hi) << 32; if (la > map->m_la) { r = mid; if (la > lend) { DBG_BUGON(1); return -EFSCORRUPTED; } lend = la; } else { l = mid + 1; if (map->m_la == la) r = min(l + 1, r); lstart = la; map->m_plen = le32_to_cpu(ext->plen); map->m_pa = pa; } } last = (l >= vi->z_extents); } if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; } else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) { map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; if (fmt) map->m_algorithmformat = fmt - 1; else if (interlaced && !erofs_blkoff(sb, map->m_pa)) map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED; else map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) map->m_flags |= EROFS_MAP_PARTIAL_REF; map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; } } map->m_llen = lend - map->m_la; return 0; } static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; struct z_erofs_map_header *h; erofs_off_t pos; int err = 0; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { /* * paired with smp_mb() at the end of the function to ensure * fields will only be observed after the bit is set. */ smp_mb(); return 0; } if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) return -ERESTARTSYS; if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode)); if (IS_ERR(h)) { err = PTR_ERR(h); goto out_unlock; } /* * if the highest bit of the 8-byte map header is set, the whole file * is stored in the packed inode. The rest bits keeps z_fragmentoff. */ if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); vi->z_tailextent_headlcn = 0; goto done; } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15); if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) { vi->z_extents = le32_to_cpu(h->h_extents_lo) | ((u64)le16_to_cpu(h->h_extents_hi) << 32); goto done; } vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) vi->z_idata_size = le16_to_cpu(h->h_idata_size); if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", vi->nid); err = -EFSCORRUPTED; goto out_unlock; } if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", vi->nid); err = -EFSCORRUPTED; goto out_unlock; } if (vi->z_idata_size || (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { struct erofs_map_blocks tm = { .buf = __EROFS_BUF_INITIALIZER }; err = z_erofs_map_blocks_fo(inode, &tm, EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&tm.buf); if (err < 0) goto out_unlock; } done: /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; } static int z_erofs_map_sanity_check(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); u64 pend; if (!(map->m_flags & EROFS_MAP_ENCODED)) return 0; if (unlikely(map->m_algorithmformat >= Z_EROFS_COMPRESSION_RUNTIME_MAX)) { erofs_err(inode->i_sb, "unknown algorithm %d @ pos %llu for nid %llu, please upgrade kernel", map->m_algorithmformat, map->m_la, EROFS_I(inode)->nid); return -EOPNOTSUPP; } if (unlikely(map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX && !(sbi->available_compr_algs & (1 << map->m_algorithmformat)))) { erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", map->m_algorithmformat, EROFS_I(inode)->nid); return -EFSCORRUPTED; } if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) return -EOPNOTSUPP; /* Filesystems beyond 48-bit physical block addresses are invalid */ if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) || (pend >> sbi->blkszbits) >= BIT_ULL(48))) return -EFSCORRUPTED; return 0; } int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) { struct erofs_inode *const vi = EROFS_I(inode); int err = 0; trace_erofs_map_blocks_enter(inode, map, flags); if (map->m_la >= inode->i_size) { /* post-EOF unmapped extent */ map->m_llen = map->m_la + 1 - inode->i_size; map->m_la = inode->i_size; map->m_flags = 0; } else { err = z_erofs_fill_inode(inode, map); if (!err) { if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) err = z_erofs_map_blocks_ext(inode, map, flags); else err = z_erofs_map_blocks_fo(inode, map, flags); } if (!err) err = z_erofs_map_sanity_check(inode, map); if (err) map->m_llen = 0; } trace_erofs_map_blocks_exit(inode, map, flags, err); return err; } static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; struct erofs_map_blocks map = { .m_la = offset }; ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP); erofs_put_metabuf(&map.buf); if (ret < 0) return ret; iomap->bdev = inode->i_sb->s_bdev; iomap->offset = map.m_la; iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; /* * No strict rule on how to describe extents for post EOF, yet * we need to do like below. Otherwise, iomap itself will get * into an endless loop on post EOF. * * Calculate the effective offset by subtracting extent start * (map.m_la) from the requested offset, and add it to length. * (NB: offset >= map.m_la always) */ if (iomap->offset >= inode->i_size) iomap->length = length + offset - map.m_la; } iomap->flags = 0; return 0; } const struct iomap_ops z_erofs_iomap_report_ops = { .iomap_begin = z_erofs_iomap_begin_report, };
211 211 208 3 211 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 // SPDX-License-Identifier: GPL-2.0-only /* * Functions explicitly implemented for exec functionality which however are * explicitly VMA-only logic. */ #include "vma_internal.h" #include "vma.h" /* * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between * this VMA and its relocated range, which will now reside at [vma->vm_start - * shift, vma->vm_end - shift). * * This function is almost certainly NOT what you want for anything other than * early executable temporary stack relocation. */ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) { /* * The process proceeds as follows: * * 1) Use shift to calculate the new vma endpoints. * 2) Extend vma to cover both the old and new ranges. This ensures the * arguments passed to subsequent functions are consistent. * 3) Move vma's page tables to the new range. * 4) Free up any cleared pgd range. * 5) Shrink the vma to cover only the new range. */ struct mm_struct *mm = vma->vm_mm; unsigned long old_start = vma->vm_start; unsigned long old_end = vma->vm_end; unsigned long length = old_end - old_start; unsigned long new_start = old_start - shift; unsigned long new_end = old_end - shift; VMA_ITERATOR(vmi, mm, new_start); VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff); struct vm_area_struct *next; struct mmu_gather tlb; PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); BUG_ON(new_start > new_end); /* * ensure there are no vmas between where we want to go * and where we are */ if (vma != vma_next(&vmi)) return -EFAULT; vma_iter_prev_range(&vmi); /* * cover the whole range: [new_start, old_end) */ vmg.target = vma; if (vma_expand(&vmg)) return -ENOMEM; /* * move the page tables downwards, on failure we rely on * process cleanup to remove whatever mess we made. */ pmc.for_stack = true; if (length != move_page_tables(&pmc)) return -ENOMEM; tlb_gather_mmu(&tlb, mm); next = vma_next(&vmi); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. */ free_pgd_range(&tlb, new_end, old_end, new_end, next ? next->vm_start : USER_PGTABLES_CEILING); } else { /* * otherwise, clean from old_start; this is done to not touch * the address space in [new_end, old_start) some architectures * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ free_pgd_range(&tlb, old_start, old_end, new_end, next ? next->vm_start : USER_PGTABLES_CEILING); } tlb_finish_mmu(&tlb); vma_prev(&vmi); /* Shrink the vma to just the new range */ return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); } /* * Establish the stack VMA in an execve'd process, located temporarily at the * maximum stack address provided by the architecture. * * We later relocate this downwards in relocate_vma_down(). * * This function is almost certainly NOT what you want for anything other than * early executable initialisation. * * On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the * maximum addressable location in the stack (that is capable of storing a * system word of data). */ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, unsigned long *top_mem_p) { unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; int err; struct vm_area_struct *vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; vma_set_anonymous(vma); if (mmap_write_lock_killable(mm)) { err = -EINTR; goto err_free; } /* * Need to be called with mmap write lock * held, to avoid race with ksmd. */ err = ksm_execve(mm); if (err) goto err_ksm; /* * Place the stack at the largest stack address the architecture * supports. Later, we'll move this to an appropriate place. We don't * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; if (pgtable_supports_soft_dirty()) flags |= VM_SOFTDIRTY; vm_flags_init(vma, flags); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); err = insert_vm_struct(mm, vma); if (err) goto err; mm->stack_vm = mm->total_vm = 1; mmap_write_unlock(mm); *vmap = vma; *top_mem_p = vma->vm_end - sizeof(void *); return 0; err: ksm_exit(mm); err_ksm: mmap_write_unlock(mm); err_free: *vmap = NULL; vm_area_free(vma); return err; }
24 24 24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 /* * Copyright (c) 2004-2011 Atheros Communications Inc. * Copyright (c) 2011-2012 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/moduleparam.h> #include <linux/inetdevice.h> #include <linux/export.h> #include <linux/sched/signal.h> #include "core.h" #include "cfg80211.h" #include "debug.h" #include "hif-ops.h" #include "testmode.h" #define RATETAB_ENT(_rate, _rateid, _flags) { \ .bitrate = (_rate), \ .flags = (_flags), \ .hw_value = (_rateid), \ } #define CHAN2G(_channel, _freq, _flags) { \ .band = NL80211_BAND_2GHZ, \ .hw_value = (_channel), \ .center_freq = (_freq), \ .flags = (_flags), \ .max_antenna_gain = 0, \ .max_power = 30, \ } #define CHAN5G(_channel, _flags) { \ .band = NL80211_BAND_5GHZ, \ .hw_value = (_channel), \ .center_freq = 5000 + (5 * (_channel)), \ .flags = (_flags), \ .max_antenna_gain = 0, \ .max_power = 30, \ } #define DEFAULT_BG_SCAN_PERIOD 60 struct ath6kl_cfg80211_match_probe_ssid { struct cfg80211_ssid ssid; u8 flag; }; static struct ieee80211_rate ath6kl_rates[] = { RATETAB_ENT(10, 0x1, 0), RATETAB_ENT(20, 0x2, 0), RATETAB_ENT(55, 0x4, 0), RATETAB_ENT(110, 0x8, 0), RATETAB_ENT(60, 0x10, 0), RATETAB_ENT(90, 0x20, 0), RATETAB_ENT(120, 0x40, 0), RATETAB_ENT(180, 0x80, 0), RATETAB_ENT(240, 0x100, 0), RATETAB_ENT(360, 0x200, 0), RATETAB_ENT(480, 0x400, 0), RATETAB_ENT(540, 0x800, 0), }; #define ath6kl_a_rates (ath6kl_rates + 4) #define ath6kl_a_rates_size 8 #define ath6kl_g_rates (ath6kl_rates + 0) #define ath6kl_g_rates_size 12 #define ath6kl_g_htcap IEEE80211_HT_CAP_SGI_20 #define ath6kl_a_htcap (IEEE80211_HT_CAP_SUP_WIDTH_20_40 | \ IEEE80211_HT_CAP_SGI_20 | \ IEEE80211_HT_CAP_SGI_40) static struct ieee80211_channel ath6kl_2ghz_channels[] = { CHAN2G(1, 2412, 0), CHAN2G(2, 2417, 0), CHAN2G(3, 2422, 0), CHAN2G(4, 2427, 0), CHAN2G(5, 2432, 0), CHAN2G(6, 2437, 0), CHAN2G(7, 2442, 0), CHAN2G(8, 2447, 0), CHAN2G(9, 2452, 0), CHAN2G(10, 2457, 0), CHAN2G(11, 2462, 0), CHAN2G(12, 2467, 0), CHAN2G(13, 2472, 0), CHAN2G(14, 2484, 0), }; static struct ieee80211_channel ath6kl_5ghz_a_channels[] = { CHAN5G(36, 0), CHAN5G(40, 0), CHAN5G(44, 0), CHAN5G(48, 0), CHAN5G(52, 0), CHAN5G(56, 0), CHAN5G(60, 0), CHAN5G(64, 0), CHAN5G(100, 0), CHAN5G(104, 0), CHAN5G(108, 0), CHAN5G(112, 0), CHAN5G(116, 0), CHAN5G(120, 0), CHAN5G(124, 0), CHAN5G(128, 0), CHAN5G(132, 0), CHAN5G(136, 0), CHAN5G(140, 0), CHAN5G(149, 0), CHAN5G(153, 0), CHAN5G(157, 0), CHAN5G(161, 0), CHAN5G(165, 0), CHAN5G(184, 0), CHAN5G(188, 0), CHAN5G(192, 0), CHAN5G(196, 0), CHAN5G(200, 0), CHAN5G(204, 0), CHAN5G(208, 0), CHAN5G(212, 0), CHAN5G(216, 0), }; static struct ieee80211_supported_band ath6kl_band_2ghz = { .n_channels = ARRAY_SIZE(ath6kl_2ghz_channels), .channels = ath6kl_2ghz_channels, .n_bitrates = ath6kl_g_rates_size, .bitrates = ath6kl_g_rates, .ht_cap.cap = ath6kl_g_htcap, .ht_cap.ht_supported = true, }; static struct ieee80211_supported_band ath6kl_band_5ghz = { .n_channels = ARRAY_SIZE(ath6kl_5ghz_a_channels), .channels = ath6kl_5ghz_a_channels, .n_bitrates = ath6kl_a_rates_size, .bitrates = ath6kl_a_rates, .ht_cap.cap = ath6kl_a_htcap, .ht_cap.ht_supported = true, }; #define CCKM_KRK_CIPHER_SUITE 0x004096ff /* use for KRK */ /* returns true if scheduled scan was stopped */ static bool __ath6kl_cfg80211_sscan_stop(struct ath6kl_vif *vif) { struct ath6kl *ar = vif->ar; if (!test_and_clear_bit(SCHED_SCANNING, &vif->flags)) return false; timer_delete_sync(&vif->sched_scan_timer); if (ar->state == ATH6KL_STATE_RECOVERY) return true; ath6kl_wmi_enable_sched_scan_cmd(ar->wmi, vif->fw_vif_idx, false); return true; } static void ath6kl_cfg80211_sscan_disable(struct ath6kl_vif *vif) { struct ath6kl *ar = vif->ar; bool stopped; stopped = __ath6kl_cfg80211_sscan_stop(vif); if (!stopped) return; cfg80211_sched_scan_stopped(ar->wiphy, 0); } static int ath6kl_set_wpa_version(struct ath6kl_vif *vif, enum nl80211_wpa_versions wpa_version) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: %u\n", __func__, wpa_version); if (!wpa_version) { vif->auth_mode = NONE_AUTH; } else if (wpa_version & NL80211_WPA_VERSION_2) { vif->auth_mode = WPA2_AUTH; } else if (wpa_version & NL80211_WPA_VERSION_1) { vif->auth_mode = WPA_AUTH; } else { ath6kl_err("%s: %u not supported\n", __func__, wpa_version); return -ENOTSUPP; } return 0; } static int ath6kl_set_auth_type(struct ath6kl_vif *vif, enum nl80211_auth_type auth_type) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: 0x%x\n", __func__, auth_type); switch (auth_type) { case NL80211_AUTHTYPE_OPEN_SYSTEM: vif->dot11_auth_mode = OPEN_AUTH; break; case NL80211_AUTHTYPE_SHARED_KEY: vif->dot11_auth_mode = SHARED_AUTH; break; case NL80211_AUTHTYPE_NETWORK_EAP: vif->dot11_auth_mode = LEAP_AUTH; break; case NL80211_AUTHTYPE_AUTOMATIC: vif->dot11_auth_mode = OPEN_AUTH | SHARED_AUTH; break; default: ath6kl_err("%s: 0x%x not supported\n", __func__, auth_type); return -ENOTSUPP; } return 0; } static int ath6kl_set_cipher(struct ath6kl_vif *vif, u32 cipher, bool ucast) { u8 *ar_cipher = ucast ? &vif->prwise_crypto : &vif->grp_crypto; u8 *ar_cipher_len = ucast ? &vif->prwise_crypto_len : &vif->grp_crypto_len; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: cipher 0x%x, ucast %u\n", __func__, cipher, ucast); switch (cipher) { case 0: /* our own hack to use value 0 as no crypto used */ *ar_cipher = NONE_CRYPT; *ar_cipher_len = 0; break; case WLAN_CIPHER_SUITE_WEP40: *ar_cipher = WEP_CRYPT; *ar_cipher_len = 5; break; case WLAN_CIPHER_SUITE_WEP104: *ar_cipher = WEP_CRYPT; *ar_cipher_len = 13; break; case WLAN_CIPHER_SUITE_TKIP: *ar_cipher = TKIP_CRYPT; *ar_cipher_len = 0; break; case WLAN_CIPHER_SUITE_CCMP: *ar_cipher = AES_CRYPT; *ar_cipher_len = 0; break; case WLAN_CIPHER_SUITE_SMS4: *ar_cipher = WAPI_CRYPT; *ar_cipher_len = 0; break; default: ath6kl_err("cipher 0x%x not supported\n", cipher); return -ENOTSUPP; } return 0; } static void ath6kl_set_key_mgmt(struct ath6kl_vif *vif, u32 key_mgmt) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: 0x%x\n", __func__, key_mgmt); if (key_mgmt == WLAN_AKM_SUITE_PSK) { if (vif->auth_mode == WPA_AUTH) vif->auth_mode = WPA_PSK_AUTH; else if (vif->auth_mode == WPA2_AUTH) vif->auth_mode = WPA2_PSK_AUTH; } else if (key_mgmt == 0x00409600) { if (vif->auth_mode == WPA_AUTH) vif->auth_mode = WPA_AUTH_CCKM; else if (vif->auth_mode == WPA2_AUTH) vif->auth_mode = WPA2_AUTH_CCKM; } else if (key_mgmt != WLAN_AKM_SUITE_8021X) { vif->auth_mode = NONE_AUTH; } } static bool ath6kl_cfg80211_ready(struct ath6kl_vif *vif) { struct ath6kl *ar = vif->ar; if (!test_bit(WMI_READY, &ar->flag)) { ath6kl_err("wmi is not ready\n"); return false; } if (!test_bit(WLAN_ENABLED, &vif->flags)) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "wlan disabled\n"); return false; } return true; } static bool ath6kl_is_wpa_ie(const u8 *pos) { return pos[0] == WLAN_EID_VENDOR_SPECIFIC && pos[1] >= 4 && pos[2] == 0x00 && pos[3] == 0x50 && pos[4] == 0xf2 && pos[5] == 0x01; } static bool ath6kl_is_rsn_ie(const u8 *pos) { return pos[0] == WLAN_EID_RSN; } static bool ath6kl_is_wps_ie(const u8 *pos) { return (pos[0] == WLAN_EID_VENDOR_SPECIFIC && pos[1] >= 4 && pos[2] == 0x00 && pos[3] == 0x50 && pos[4] == 0xf2 && pos[5] == 0x04); } static int ath6kl_set_assoc_req_ies(struct ath6kl_vif *vif, const u8 *ies, size_t ies_len) { struct ath6kl *ar = vif->ar; const u8 *pos; u8 *buf = NULL; size_t len = 0; int ret; /* * Clear previously set flag */ ar->connect_ctrl_flags &= ~CONNECT_WPS_FLAG; /* * Filter out RSN/WPA IE(s) */ if (ies && ies_len) { buf = kmalloc(ies_len, GFP_KERNEL); if (buf == NULL) return -ENOMEM; pos = ies; while (pos + 1 < ies + ies_len) { if (pos + 2 + pos[1] > ies + ies_len) break; if (!(ath6kl_is_wpa_ie(pos) || ath6kl_is_rsn_ie(pos))) { memcpy(buf + len, pos, 2 + pos[1]); len += 2 + pos[1]; } if (ath6kl_is_wps_ie(pos)) ar->connect_ctrl_flags |= CONNECT_WPS_FLAG; pos += 2 + pos[1]; } } ret = ath6kl_wmi_set_appie_cmd(ar->wmi, vif->fw_vif_idx, WMI_FRAME_ASSOC_REQ, buf, len); kfree(buf); return ret; } static int ath6kl_nliftype_to_drv_iftype(enum nl80211_iftype type, u8 *nw_type) { switch (type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: *nw_type = INFRA_NETWORK; break; case NL80211_IFTYPE_ADHOC: *nw_type = ADHOC_NETWORK; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: *nw_type = AP_NETWORK; break; default: ath6kl_err("invalid interface type %u\n", type); return -ENOTSUPP; } return 0; } static bool ath6kl_is_valid_iftype(struct ath6kl *ar, enum nl80211_iftype type, u8 *if_idx, u8 *nw_type) { int i; if (ath6kl_nliftype_to_drv_iftype(type, nw_type)) return false; if (ar->ibss_if_active || ((type == NL80211_IFTYPE_ADHOC) && ar->num_vif)) return false; if (type == NL80211_IFTYPE_STATION || type == NL80211_IFTYPE_AP || type == NL80211_IFTYPE_ADHOC) { for (i = 0; i < ar->vif_max; i++) { if ((ar->avail_idx_map) & BIT(i)) { *if_idx = i; return true; } } } if (type == NL80211_IFTYPE_P2P_CLIENT || type == NL80211_IFTYPE_P2P_GO) { for (i = ar->max_norm_iface; i < ar->vif_max; i++) { if ((ar->avail_idx_map) & BIT(i)) { *if_idx = i; return true; } } } return false; } static bool ath6kl_is_tx_pending(struct ath6kl *ar) { return ar->tx_pending[ath6kl_wmi_get_control_ep(ar->wmi)] == 0; } static void ath6kl_cfg80211_sta_bmiss_enhance(struct ath6kl_vif *vif, bool enable) { int err; if (WARN_ON(!test_bit(WMI_READY, &vif->ar->flag))) return; if (vif->nw_type != INFRA_NETWORK) return; if (!test_bit(ATH6KL_FW_CAPABILITY_BMISS_ENHANCE, vif->ar->fw_capabilities)) return; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s fw bmiss enhance\n", enable ? "enable" : "disable"); err = ath6kl_wmi_sta_bmiss_enhance_cmd(vif->ar->wmi, vif->fw_vif_idx, enable); if (err) ath6kl_err("failed to %s enhanced bmiss detection: %d\n", enable ? "enable" : "disable", err); } static int ath6kl_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); int status; u8 nw_subtype = (ar->p2p) ? SUBTYPE_P2PDEV : SUBTYPE_NONE; u16 interval; ath6kl_cfg80211_sscan_disable(vif); vif->sme_state = SME_CONNECTING; if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (test_bit(DESTROY_IN_PROGRESS, &ar->flag)) { ath6kl_err("destroy in progress\n"); return -EBUSY; } if (test_bit(SKIP_SCAN, &ar->flag) && ((sme->channel && sme->channel->center_freq == 0) || (sme->bssid && is_zero_ether_addr(sme->bssid)))) { ath6kl_err("SkipScan: channel or bssid invalid\n"); return -EINVAL; } if (down_interruptible(&ar->sem)) { ath6kl_err("busy, couldn't get access\n"); return -ERESTARTSYS; } if (test_bit(DESTROY_IN_PROGRESS, &ar->flag)) { ath6kl_err("busy, destroy in progress\n"); up(&ar->sem); return -EBUSY; } if (ar->tx_pending[ath6kl_wmi_get_control_ep(ar->wmi)]) { /* * sleep until the command queue drains */ wait_event_interruptible_timeout(ar->event_wq, ath6kl_is_tx_pending(ar), WMI_TIMEOUT); if (signal_pending(current)) { ath6kl_err("cmd queue drain timeout\n"); up(&ar->sem); return -EINTR; } } status = ath6kl_set_assoc_req_ies(vif, sme->ie, sme->ie_len); if (status) { up(&ar->sem); return status; } if (sme->ie == NULL || sme->ie_len == 0) ar->connect_ctrl_flags &= ~CONNECT_WPS_FLAG; if (test_bit(CONNECTED, &vif->flags) && vif->ssid_len == sme->ssid_len && !memcmp(vif->ssid, sme->ssid, vif->ssid_len)) { vif->reconnect_flag = true; status = ath6kl_wmi_reconnect_cmd(ar->wmi, vif->fw_vif_idx, vif->req_bssid, vif->ch_hint); up(&ar->sem); if (status) { ath6kl_err("wmi_reconnect_cmd failed\n"); return -EIO; } return 0; } else if (vif->ssid_len == sme->ssid_len && !memcmp(vif->ssid, sme->ssid, vif->ssid_len)) { ath6kl_disconnect(vif); } memset(vif->ssid, 0, sizeof(vif->ssid)); vif->ssid_len = sme->ssid_len; memcpy(vif->ssid, sme->ssid, sme->ssid_len); if (sme->channel) vif->ch_hint = sme->channel->center_freq; memset(vif->req_bssid, 0, sizeof(vif->req_bssid)); if (sme->bssid && !is_broadcast_ether_addr(sme->bssid)) memcpy(vif->req_bssid, sme->bssid, sizeof(vif->req_bssid)); ath6kl_set_wpa_version(vif, sme->crypto.wpa_versions); status = ath6kl_set_auth_type(vif, sme->auth_type); if (status) { up(&ar->sem); return status; } if (sme->crypto.n_ciphers_pairwise) ath6kl_set_cipher(vif, sme->crypto.ciphers_pairwise[0], true); else ath6kl_set_cipher(vif, 0, true); ath6kl_set_cipher(vif, sme->crypto.cipher_group, false); if (sme->crypto.n_akm_suites) ath6kl_set_key_mgmt(vif, sme->crypto.akm_suites[0]); if ((sme->key_len) && (vif->auth_mode == NONE_AUTH) && (vif->prwise_crypto == WEP_CRYPT)) { struct ath6kl_key *key = NULL; if (sme->key_idx > WMI_MAX_KEY_INDEX) { ath6kl_err("key index %d out of bounds\n", sme->key_idx); up(&ar->sem); return -ENOENT; } key = &vif->keys[sme->key_idx]; key->key_len = sme->key_len; memcpy(key->key, sme->key, key->key_len); key->cipher = vif->prwise_crypto; vif->def_txkey_index = sme->key_idx; ath6kl_wmi_addkey_cmd(ar->wmi, vif->fw_vif_idx, sme->key_idx, vif->prwise_crypto, GROUP_USAGE | TX_USAGE, key->key_len, NULL, 0, key->key, KEY_OP_INIT_VAL, NULL, NO_SYNC_WMIFLAG); } if (!ar->usr_bss_filter) { clear_bit(CLEAR_BSSFILTER_ON_BEACON, &vif->flags); if (ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx, ALL_BSS_FILTER, 0) != 0) { ath6kl_err("couldn't set bss filtering\n"); up(&ar->sem); return -EIO; } } vif->nw_type = vif->next_mode; /* enable enhanced bmiss detection if applicable */ ath6kl_cfg80211_sta_bmiss_enhance(vif, true); if (vif->wdev.iftype == NL80211_IFTYPE_P2P_CLIENT) nw_subtype = SUBTYPE_P2PCLIENT; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: connect called with authmode %d dot11 auth %d" " PW crypto %d PW crypto len %d GRP crypto %d" " GRP crypto len %d channel hint %u\n", __func__, vif->auth_mode, vif->dot11_auth_mode, vif->prwise_crypto, vif->prwise_crypto_len, vif->grp_crypto, vif->grp_crypto_len, vif->ch_hint); vif->reconnect_flag = 0; if (vif->nw_type == INFRA_NETWORK) { interval = max_t(u16, vif->listen_intvl_t, ATH6KL_MAX_WOW_LISTEN_INTL); status = ath6kl_wmi_listeninterval_cmd(ar->wmi, vif->fw_vif_idx, interval, 0); if (status) { ath6kl_err("couldn't set listen intervel\n"); up(&ar->sem); return status; } } status = ath6kl_wmi_connect_cmd(ar->wmi, vif->fw_vif_idx, vif->nw_type, vif->dot11_auth_mode, vif->auth_mode, vif->prwise_crypto, vif->prwise_crypto_len, vif->grp_crypto, vif->grp_crypto_len, vif->ssid_len, vif->ssid, vif->req_bssid, vif->ch_hint, ar->connect_ctrl_flags, nw_subtype); if (sme->bg_scan_period == 0) { /* disable background scan if period is 0 */ sme->bg_scan_period = 0xffff; } else if (sme->bg_scan_period == -1) { /* configure default value if not specified */ sme->bg_scan_period = DEFAULT_BG_SCAN_PERIOD; } ath6kl_wmi_scanparams_cmd(ar->wmi, vif->fw_vif_idx, 0, 0, sme->bg_scan_period, 0, 0, 0, 3, 0, 0, 0); up(&ar->sem); if (status == -EINVAL) { memset(vif->ssid, 0, sizeof(vif->ssid)); vif->ssid_len = 0; ath6kl_err("invalid request\n"); return -ENOENT; } else if (status) { ath6kl_err("ath6kl_wmi_connect_cmd failed\n"); return -EIO; } if ((!(ar->connect_ctrl_flags & CONNECT_DO_WPA_OFFLOAD)) && ((vif->auth_mode == WPA_PSK_AUTH) || (vif->auth_mode == WPA2_PSK_AUTH))) { mod_timer(&vif->disconnect_timer, jiffies + msecs_to_jiffies(DISCON_TIMER_INTVAL)); } ar->connect_ctrl_flags &= ~CONNECT_DO_WPA_OFFLOAD; set_bit(CONNECT_PEND, &vif->flags); return 0; } static struct cfg80211_bss * ath6kl_add_bss_if_needed(struct ath6kl_vif *vif, enum network_type nw_type, const u8 *bssid, struct ieee80211_channel *chan, const u8 *beacon_ie, size_t beacon_ie_len) { struct ath6kl *ar = vif->ar; struct cfg80211_bss *bss; u16 cap_val; enum ieee80211_bss_type bss_type; u8 *ie; if (nw_type & ADHOC_NETWORK) { cap_val = WLAN_CAPABILITY_IBSS; bss_type = IEEE80211_BSS_TYPE_IBSS; } else { cap_val = WLAN_CAPABILITY_ESS; bss_type = IEEE80211_BSS_TYPE_ESS; } bss = cfg80211_get_bss(ar->wiphy, chan, bssid, vif->ssid, vif->ssid_len, bss_type, IEEE80211_PRIVACY_ANY); if (bss == NULL) { /* * Since cfg80211 may not yet know about the BSS, * generate a partial entry until the first BSS info * event becomes available. * * Prepend SSID element since it is not included in the Beacon * IEs from the target. */ ie = kmalloc(2 + vif->ssid_len + beacon_ie_len, GFP_KERNEL); if (ie == NULL) return NULL; ie[0] = WLAN_EID_SSID; ie[1] = vif->ssid_len; memcpy(ie + 2, vif->ssid, vif->ssid_len); memcpy(ie + 2 + vif->ssid_len, beacon_ie, beacon_ie_len); bss = cfg80211_inform_bss(ar->wiphy, chan, CFG80211_BSS_FTYPE_UNKNOWN, bssid, 0, cap_val, 100, ie, 2 + vif->ssid_len + beacon_ie_len, 0, GFP_KERNEL); if (bss) ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "added bss %pM to cfg80211\n", bssid); kfree(ie); } else { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "cfg80211 already has a bss\n"); } return bss; } void ath6kl_cfg80211_connect_event(struct ath6kl_vif *vif, u16 channel, u8 *bssid, u16 listen_intvl, u16 beacon_intvl, enum network_type nw_type, u8 beacon_ie_len, u8 assoc_req_len, u8 assoc_resp_len, u8 *assoc_info) { struct ieee80211_channel *chan; struct ath6kl *ar = vif->ar; struct cfg80211_bss *bss; /* capinfo + listen interval */ u8 assoc_req_ie_offset = sizeof(u16) + sizeof(u16); /* capinfo + status code + associd */ u8 assoc_resp_ie_offset = sizeof(u16) + sizeof(u16) + sizeof(u16); u8 *assoc_req_ie = assoc_info + beacon_ie_len + assoc_req_ie_offset; u8 *assoc_resp_ie = assoc_info + beacon_ie_len + assoc_req_len + assoc_resp_ie_offset; assoc_req_len -= assoc_req_ie_offset; assoc_resp_len -= assoc_resp_ie_offset; /* * Store Beacon interval here; DTIM period will be available only once * a Beacon frame from the AP is seen. */ vif->assoc_bss_beacon_int = beacon_intvl; clear_bit(DTIM_PERIOD_AVAIL, &vif->flags); if (nw_type & ADHOC_NETWORK) { if (vif->wdev.iftype != NL80211_IFTYPE_ADHOC) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: ath6k not in ibss mode\n", __func__); return; } } if (nw_type & INFRA_NETWORK) { if (vif->wdev.iftype != NL80211_IFTYPE_STATION && vif->wdev.iftype != NL80211_IFTYPE_P2P_CLIENT) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: ath6k not in station mode\n", __func__); return; } } chan = ieee80211_get_channel(ar->wiphy, (int) channel); bss = ath6kl_add_bss_if_needed(vif, nw_type, bssid, chan, assoc_info, beacon_ie_len); if (!bss) { ath6kl_err("could not add cfg80211 bss entry\n"); return; } if (nw_type & ADHOC_NETWORK) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "ad-hoc %s selected\n", nw_type & ADHOC_CREATOR ? "creator" : "joiner"); cfg80211_ibss_joined(vif->ndev, bssid, chan, GFP_KERNEL); cfg80211_put_bss(ar->wiphy, bss); return; } if (vif->sme_state == SME_CONNECTING) { /* inform connect result to cfg80211 */ vif->sme_state = SME_CONNECTED; cfg80211_connect_result(vif->ndev, bssid, assoc_req_ie, assoc_req_len, assoc_resp_ie, assoc_resp_len, WLAN_STATUS_SUCCESS, GFP_KERNEL); cfg80211_put_bss(ar->wiphy, bss); } else if (vif->sme_state == SME_CONNECTED) { struct cfg80211_roam_info roam_info = { .links[0].bss = bss, .req_ie = assoc_req_ie, .req_ie_len = assoc_req_len, .resp_ie = assoc_resp_ie, .resp_ie_len = assoc_resp_len, }; /* inform roam event to cfg80211 */ cfg80211_roamed(vif->ndev, &roam_info, GFP_KERNEL); } } static int ath6kl_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev, u16 reason_code) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: reason=%u\n", __func__, reason_code); ath6kl_cfg80211_sscan_disable(vif); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (test_bit(DESTROY_IN_PROGRESS, &ar->flag)) { ath6kl_err("busy, destroy in progress\n"); return -EBUSY; } if (down_interruptible(&ar->sem)) { ath6kl_err("busy, couldn't get access\n"); return -ERESTARTSYS; } vif->reconnect_flag = 0; ath6kl_disconnect(vif); memset(vif->ssid, 0, sizeof(vif->ssid)); vif->ssid_len = 0; if (!test_bit(SKIP_SCAN, &ar->flag)) memset(vif->req_bssid, 0, sizeof(vif->req_bssid)); up(&ar->sem); return 0; } void ath6kl_cfg80211_disconnect_event(struct ath6kl_vif *vif, u8 reason, u8 *bssid, u8 assoc_resp_len, u8 *assoc_info, u16 proto_reason) { struct ath6kl *ar = vif->ar; if (vif->scan_req) { struct cfg80211_scan_info info = { .aborted = true, }; cfg80211_scan_done(vif->scan_req, &info); vif->scan_req = NULL; } if (vif->nw_type & ADHOC_NETWORK) { if (vif->wdev.iftype != NL80211_IFTYPE_ADHOC) ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: ath6k not in ibss mode\n", __func__); return; } if (vif->nw_type & INFRA_NETWORK) { if (vif->wdev.iftype != NL80211_IFTYPE_STATION && vif->wdev.iftype != NL80211_IFTYPE_P2P_CLIENT) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: ath6k not in station mode\n", __func__); return; } } clear_bit(CONNECT_PEND, &vif->flags); if (vif->sme_state == SME_CONNECTING) { cfg80211_connect_result(vif->ndev, bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, GFP_KERNEL); } else if (vif->sme_state == SME_CONNECTED) { cfg80211_disconnected(vif->ndev, proto_reason, NULL, 0, false, GFP_KERNEL); } vif->sme_state = SME_DISCONNECTED; /* * Send a disconnect command to target when a disconnect event is * received with reason code other than 3 (DISCONNECT_CMD - disconnect * request from host) to make the firmware stop trying to connect even * after giving disconnect event. There will be one more disconnect * event for this disconnect command with reason code DISCONNECT_CMD * which won't be notified to cfg80211. */ if (reason != DISCONNECT_CMD) ath6kl_wmi_disconnect_cmd(ar->wmi, vif->fw_vif_idx); } static int ath6kl_set_probed_ssids(struct ath6kl *ar, struct ath6kl_vif *vif, struct cfg80211_ssid *ssids, int n_ssids, struct cfg80211_match_set *match_set, int n_match_ssid) { u8 i, j, index_to_add, ssid_found = false; struct ath6kl_cfg80211_match_probe_ssid ssid_list[MAX_PROBED_SSIDS]; memset(ssid_list, 0, sizeof(ssid_list)); if (n_ssids > MAX_PROBED_SSIDS || n_match_ssid > MAX_PROBED_SSIDS) return -EINVAL; for (i = 0; i < n_ssids; i++) { memcpy(ssid_list[i].ssid.ssid, ssids[i].ssid, ssids[i].ssid_len); ssid_list[i].ssid.ssid_len = ssids[i].ssid_len; if (ssids[i].ssid_len) ssid_list[i].flag = SPECIFIC_SSID_FLAG; else ssid_list[i].flag = ANY_SSID_FLAG; if (ar->wiphy->max_match_sets != 0 && n_match_ssid == 0) ssid_list[i].flag |= MATCH_SSID_FLAG; } index_to_add = i; for (i = 0; i < n_match_ssid; i++) { ssid_found = false; for (j = 0; j < n_ssids; j++) { if ((match_set[i].ssid.ssid_len == ssid_list[j].ssid.ssid_len) && (!memcmp(ssid_list[j].ssid.ssid, match_set[i].ssid.ssid, match_set[i].ssid.ssid_len))) { ssid_list[j].flag |= MATCH_SSID_FLAG; ssid_found = true; break; } } if (ssid_found) continue; if (index_to_add >= MAX_PROBED_SSIDS) continue; ssid_list[index_to_add].ssid.ssid_len = match_set[i].ssid.ssid_len; memcpy(ssid_list[index_to_add].ssid.ssid, match_set[i].ssid.ssid, match_set[i].ssid.ssid_len); ssid_list[index_to_add].flag |= MATCH_SSID_FLAG; index_to_add++; } for (i = 0; i < index_to_add; i++) { ath6kl_wmi_probedssid_cmd(ar->wmi, vif->fw_vif_idx, i, ssid_list[i].flag, ssid_list[i].ssid.ssid_len, ssid_list[i].ssid.ssid); } /* Make sure no old entries are left behind */ for (i = index_to_add; i < MAX_PROBED_SSIDS; i++) { ath6kl_wmi_probedssid_cmd(ar->wmi, vif->fw_vif_idx, i, DISABLE_SSID_FLAG, 0, NULL); } return 0; } static int ath6kl_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(request->wdev); struct ath6kl *ar = ath6kl_priv(vif->ndev); s8 n_channels = 0; u16 *channels = NULL; int ret = 0; u32 force_fg_scan = 0; if (!ath6kl_cfg80211_ready(vif)) return -EIO; ath6kl_cfg80211_sscan_disable(vif); if (!ar->usr_bss_filter) { clear_bit(CLEAR_BSSFILTER_ON_BEACON, &vif->flags); ret = ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx, ALL_BSS_FILTER, 0); if (ret) { ath6kl_err("couldn't set bss filtering\n"); return ret; } } ret = ath6kl_set_probed_ssids(ar, vif, request->ssids, request->n_ssids, NULL, 0); if (ret < 0) return ret; /* this also clears IE in fw if it's not set */ ret = ath6kl_wmi_set_appie_cmd(ar->wmi, vif->fw_vif_idx, WMI_FRAME_PROBE_REQ, request->ie, request->ie_len); if (ret) { ath6kl_err("failed to set Probe Request appie for scan\n"); return ret; } /* * Scan only the requested channels if the request specifies a set of * channels. If the list is longer than the target supports, do not * configure the list and instead, scan all available channels. */ if (request->n_channels > 0 && request->n_channels <= WMI_MAX_CHANNELS) { u8 i; n_channels = request->n_channels; channels = kcalloc(n_channels, sizeof(u16), GFP_KERNEL); if (channels == NULL) { ath6kl_warn("failed to set scan channels, scan all channels"); n_channels = 0; } for (i = 0; i < n_channels; i++) channels[i] = request->channels[i]->center_freq; } if (test_bit(CONNECTED, &vif->flags)) force_fg_scan = 1; vif->scan_req = request; ret = ath6kl_wmi_beginscan_cmd(ar->wmi, vif->fw_vif_idx, WMI_LONG_SCAN, force_fg_scan, false, 0, ATH6KL_FG_SCAN_INTERVAL, n_channels, channels, request->no_cck, request->rates); if (ret) { ath6kl_err("failed to start scan: %d\n", ret); vif->scan_req = NULL; } kfree(channels); return ret; } void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted) { struct ath6kl *ar = vif->ar; struct cfg80211_scan_info info = { .aborted = aborted, }; int i; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: status%s\n", __func__, aborted ? " aborted" : ""); if (!vif->scan_req) return; if (aborted) goto out; if (vif->scan_req->n_ssids && vif->scan_req->ssids[0].ssid_len) { for (i = 0; i < vif->scan_req->n_ssids; i++) { ath6kl_wmi_probedssid_cmd(ar->wmi, vif->fw_vif_idx, i, DISABLE_SSID_FLAG, 0, NULL); } } out: cfg80211_scan_done(vif->scan_req, &info); vif->scan_req = NULL; } void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, enum wmi_phy_mode mode) { struct cfg80211_chan_def chandef; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "channel switch notify nw_type %d freq %d mode %d\n", vif->nw_type, freq, mode); cfg80211_chandef_create(&chandef, ieee80211_get_channel(vif->ar->wiphy, freq), (mode == WMI_11G_HT20 && ath6kl_band_2ghz.ht_cap.ht_supported) ? NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); wiphy_lock(vif->ar->wiphy); cfg80211_ch_switch_notify(vif->ndev, &chandef, 0); wiphy_unlock(vif->ar->wiphy); } static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct ath6kl *ar = ath6kl_priv(ndev); struct ath6kl_vif *vif = netdev_priv(ndev); struct ath6kl_key *key = NULL; int seq_len; u8 key_usage; u8 key_type; if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (params->cipher == CCKM_KRK_CIPHER_SUITE) { if (params->key_len != WMI_KRK_LEN) return -EINVAL; return ath6kl_wmi_add_krk_cmd(ar->wmi, vif->fw_vif_idx, params->key); } if (key_index > WMI_MAX_KEY_INDEX) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: key index %d out of bounds\n", __func__, key_index); return -ENOENT; } key = &vif->keys[key_index]; memset(key, 0, sizeof(struct ath6kl_key)); if (pairwise) key_usage = PAIRWISE_USAGE; else key_usage = GROUP_USAGE; seq_len = params->seq_len; if (params->cipher == WLAN_CIPHER_SUITE_SMS4 && seq_len > ATH6KL_KEY_SEQ_LEN) { /* Only first half of the WPI PN is configured */ seq_len = ATH6KL_KEY_SEQ_LEN; } if (params->key_len > WLAN_MAX_KEY_LEN || seq_len > sizeof(key->seq)) return -EINVAL; key->key_len = params->key_len; memcpy(key->key, params->key, key->key_len); key->seq_len = seq_len; memcpy(key->seq, params->seq, key->seq_len); key->cipher = params->cipher; switch (key->cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: key_type = WEP_CRYPT; break; case WLAN_CIPHER_SUITE_TKIP: key_type = TKIP_CRYPT; break; case WLAN_CIPHER_SUITE_CCMP: key_type = AES_CRYPT; break; case WLAN_CIPHER_SUITE_SMS4: key_type = WAPI_CRYPT; break; default: return -ENOTSUPP; } if (((vif->auth_mode == WPA_PSK_AUTH) || (vif->auth_mode == WPA2_PSK_AUTH)) && (key_usage & GROUP_USAGE)) timer_delete(&vif->disconnect_timer); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d, key_len %d, key_type 0x%x, key_usage 0x%x, seq_len %d\n", __func__, key_index, key->key_len, key_type, key_usage, key->seq_len); if (vif->nw_type == AP_NETWORK && !pairwise && (key_type == TKIP_CRYPT || key_type == AES_CRYPT || key_type == WAPI_CRYPT)) { ar->ap_mode_bkey.valid = true; ar->ap_mode_bkey.key_index = key_index; ar->ap_mode_bkey.key_type = key_type; ar->ap_mode_bkey.key_len = key->key_len; memcpy(ar->ap_mode_bkey.key, key->key, key->key_len); if (!test_bit(CONNECTED, &vif->flags)) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "Delay initial group key configuration until AP mode has been started\n"); /* * The key will be set in ath6kl_connect_ap_mode() once * the connected event is received from the target. */ return 0; } } if (vif->next_mode == AP_NETWORK && key_type == WEP_CRYPT && !test_bit(CONNECTED, &vif->flags)) { /* * Store the key locally so that it can be re-configured after * the AP mode has properly started * (ath6kl_install_statioc_wep_keys). */ ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "Delay WEP key configuration until AP mode has been started\n"); vif->wep_key_list[key_index].key_len = key->key_len; memcpy(vif->wep_key_list[key_index].key, key->key, key->key_len); return 0; } return ath6kl_wmi_addkey_cmd(ar->wmi, vif->fw_vif_idx, key_index, key_type, key_usage, key->key_len, key->seq, key->seq_len, key->key, KEY_OP_INIT_VAL, (u8 *) mac_addr, SYNC_BOTH_WMIFLAG); } static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { struct ath6kl *ar = ath6kl_priv(ndev); struct ath6kl_vif *vif = netdev_priv(ndev); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (key_index > WMI_MAX_KEY_INDEX) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: key index %d out of bounds\n", __func__, key_index); return -ENOENT; } if (!vif->keys[key_index].key_len) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d is empty\n", __func__, key_index); return 0; } vif->keys[key_index].key_len = 0; return ath6kl_wmi_deletekey_cmd(ar->wmi, vif->fw_vif_idx, key_index); } static int ath6kl_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback) (void *cookie, struct key_params *)) { struct ath6kl_vif *vif = netdev_priv(ndev); struct ath6kl_key *key = NULL; struct key_params params; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (key_index > WMI_MAX_KEY_INDEX) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: key index %d out of bounds\n", __func__, key_index); return -ENOENT; } key = &vif->keys[key_index]; memset(&params, 0, sizeof(params)); params.cipher = key->cipher; params.key_len = key->key_len; params.seq_len = key->seq_len; params.seq = key->seq; params.key = key->key; callback(cookie, &params); return key->key_len ? 0 : -ENOENT; } static int ath6kl_cfg80211_set_default_key(struct wiphy *wiphy, struct net_device *ndev, int link_id, u8 key_index, bool unicast, bool multicast) { struct ath6kl *ar = ath6kl_priv(ndev); struct ath6kl_vif *vif = netdev_priv(ndev); struct ath6kl_key *key = NULL; u8 key_usage; enum ath6kl_crypto_type key_type = NONE_CRYPT; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (key_index > WMI_MAX_KEY_INDEX) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: key index %d out of bounds\n", __func__, key_index); return -ENOENT; } if (!vif->keys[key_index].key_len) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: invalid key index %d\n", __func__, key_index); return -EINVAL; } vif->def_txkey_index = key_index; key = &vif->keys[vif->def_txkey_index]; key_usage = GROUP_USAGE; if (vif->prwise_crypto == WEP_CRYPT) key_usage |= TX_USAGE; if (unicast) key_type = vif->prwise_crypto; if (multicast) key_type = vif->grp_crypto; if (vif->next_mode == AP_NETWORK && !test_bit(CONNECTED, &vif->flags)) return 0; /* Delay until AP mode has been started */ return ath6kl_wmi_addkey_cmd(ar->wmi, vif->fw_vif_idx, vif->def_txkey_index, key_type, key_usage, key->key_len, key->seq, key->seq_len, key->key, KEY_OP_INIT_VAL, NULL, SYNC_BOTH_WMIFLAG); } void ath6kl_cfg80211_tkip_micerr_event(struct ath6kl_vif *vif, u8 keyid, bool ismcast) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: keyid %d, ismcast %d\n", __func__, keyid, ismcast); cfg80211_michael_mic_failure(vif->ndev, vif->bssid, (ismcast ? NL80211_KEYTYPE_GROUP : NL80211_KEYTYPE_PAIRWISE), keyid, NULL, GFP_KERNEL); } static int ath6kl_cfg80211_set_wiphy_params(struct wiphy *wiphy, int radio_idx, u32 changed) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); struct ath6kl_vif *vif; int ret; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: changed 0x%x\n", __func__, changed); vif = ath6kl_vif_first(ar); if (!vif) return -EIO; if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (changed & WIPHY_PARAM_RTS_THRESHOLD) { ret = ath6kl_wmi_set_rts_cmd(ar->wmi, wiphy->rts_threshold); if (ret != 0) { ath6kl_err("ath6kl_wmi_set_rts_cmd failed\n"); return -EIO; } } return 0; } static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, struct wireless_dev *wdev, int radio_idx, enum nl80211_tx_power_setting type, int mbm) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); struct ath6kl_vif *vif; int dbm = MBM_TO_DBM(mbm); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: type 0x%x, dbm %d\n", __func__, type, dbm); vif = ath6kl_vif_first(ar); if (!vif) return -EIO; if (!ath6kl_cfg80211_ready(vif)) return -EIO; switch (type) { case NL80211_TX_POWER_AUTOMATIC: return 0; case NL80211_TX_POWER_LIMITED: ar->tx_pwr = dbm; break; default: ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: type 0x%x not supported\n", __func__, type); return -EOPNOTSUPP; } ath6kl_wmi_set_tx_pwr_cmd(ar->wmi, vif->fw_vif_idx, dbm); return 0; } static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, struct wireless_dev *wdev, int radio_idx, unsigned int link_id, int *dbm) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); struct ath6kl_vif *vif; vif = ath6kl_vif_first(ar); if (!vif) return -EIO; if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (test_bit(CONNECTED, &vif->flags)) { ar->tx_pwr = 255; if (ath6kl_wmi_get_tx_pwr_cmd(ar->wmi, vif->fw_vif_idx) != 0) { ath6kl_err("ath6kl_wmi_get_tx_pwr_cmd failed\n"); return -EIO; } wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 255, 5 * HZ); if (signal_pending(current)) { ath6kl_err("target did not respond\n"); return -EINTR; } } *dbm = ar->tx_pwr; return 0; } static int ath6kl_cfg80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool pmgmt, int timeout) { struct ath6kl *ar = ath6kl_priv(dev); struct wmi_power_mode_cmd mode; struct ath6kl_vif *vif = netdev_priv(dev); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: pmgmt %d, timeout %d\n", __func__, pmgmt, timeout); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (pmgmt) { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: rec power\n", __func__); mode.pwr_mode = REC_POWER; } else { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: max perf\n", __func__); mode.pwr_mode = MAX_PERF_POWER; } if (ath6kl_wmi_powermode_cmd(ar->wmi, vif->fw_vif_idx, mode.pwr_mode) != 0) { ath6kl_err("wmi_powermode_cmd failed\n"); return -EIO; } return 0; } static struct wireless_dev *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, struct vif_params *params) { struct ath6kl *ar = wiphy_priv(wiphy); struct wireless_dev *wdev; u8 if_idx, nw_type; if (ar->num_vif == ar->vif_max) { ath6kl_err("Reached maximum number of supported vif\n"); return ERR_PTR(-EINVAL); } if (!ath6kl_is_valid_iftype(ar, type, &if_idx, &nw_type)) { ath6kl_err("Not a supported interface type\n"); return ERR_PTR(-EINVAL); } wdev = ath6kl_interface_add(ar, name, name_assign_type, type, if_idx, nw_type); if (!wdev) return ERR_PTR(-ENOMEM); ar->num_vif++; return wdev; } static int ath6kl_cfg80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { struct ath6kl *ar = wiphy_priv(wiphy); struct ath6kl_vif *vif = netdev_priv(wdev->netdev); spin_lock_bh(&ar->list_lock); list_del(&vif->list); spin_unlock_bh(&ar->list_lock); ath6kl_cfg80211_vif_stop(vif, test_bit(WMI_READY, &ar->flag)); rtnl_lock(); ath6kl_cfg80211_vif_cleanup(vif); rtnl_unlock(); return 0; } static int ath6kl_cfg80211_change_iface(struct wiphy *wiphy, struct net_device *ndev, enum nl80211_iftype type, struct vif_params *params) { struct ath6kl_vif *vif = netdev_priv(ndev); int i; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: type %u\n", __func__, type); /* * Don't bring up p2p on an interface which is not initialized * for p2p operation where fw does not have capability to switch * dynamically between non-p2p and p2p type interface. */ if (!test_bit(ATH6KL_FW_CAPABILITY_STA_P2PDEV_DUPLEX, vif->ar->fw_capabilities) && (type == NL80211_IFTYPE_P2P_CLIENT || type == NL80211_IFTYPE_P2P_GO)) { if (vif->ar->vif_max == 1) { if (vif->fw_vif_idx != 0) return -EINVAL; else goto set_iface_type; } for (i = vif->ar->max_norm_iface; i < vif->ar->vif_max; i++) { if (i == vif->fw_vif_idx) break; } if (i == vif->ar->vif_max) { ath6kl_err("Invalid interface to bring up P2P\n"); return -EINVAL; } } /* need to clean up enhanced bmiss detection fw state */ ath6kl_cfg80211_sta_bmiss_enhance(vif, false); set_iface_type: switch (type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: vif->next_mode = INFRA_NETWORK; break; case NL80211_IFTYPE_ADHOC: vif->next_mode = ADHOC_NETWORK; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: vif->next_mode = AP_NETWORK; break; default: ath6kl_err("invalid interface type %u\n", type); return -EOPNOTSUPP; } vif->wdev.iftype = type; return 0; } static int ath6kl_cfg80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *ibss_param) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); int status; if (!ath6kl_cfg80211_ready(vif)) return -EIO; vif->ssid_len = ibss_param->ssid_len; memcpy(vif->ssid, ibss_param->ssid, vif->ssid_len); if (ibss_param->chandef.chan) vif->ch_hint = ibss_param->chandef.chan->center_freq; if (ibss_param->channel_fixed) { /* * TODO: channel_fixed: The channel should be fixed, do not * search for IBSSs to join on other channels. Target * firmware does not support this feature, needs to be * updated. */ return -EOPNOTSUPP; } memset(vif->req_bssid, 0, sizeof(vif->req_bssid)); if (ibss_param->bssid && !is_broadcast_ether_addr(ibss_param->bssid)) memcpy(vif->req_bssid, ibss_param->bssid, sizeof(vif->req_bssid)); ath6kl_set_wpa_version(vif, 0); status = ath6kl_set_auth_type(vif, NL80211_AUTHTYPE_OPEN_SYSTEM); if (status) return status; if (ibss_param->privacy) { ath6kl_set_cipher(vif, WLAN_CIPHER_SUITE_WEP40, true); ath6kl_set_cipher(vif, WLAN_CIPHER_SUITE_WEP40, false); } else { ath6kl_set_cipher(vif, 0, true); ath6kl_set_cipher(vif, 0, false); } vif->nw_type = vif->next_mode; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: connect called with authmode %d dot11 auth %d" " PW crypto %d PW crypto len %d GRP crypto %d" " GRP crypto len %d channel hint %u\n", __func__, vif->auth_mode, vif->dot11_auth_mode, vif->prwise_crypto, vif->prwise_crypto_len, vif->grp_crypto, vif->grp_crypto_len, vif->ch_hint); status = ath6kl_wmi_connect_cmd(ar->wmi, vif->fw_vif_idx, vif->nw_type, vif->dot11_auth_mode, vif->auth_mode, vif->prwise_crypto, vif->prwise_crypto_len, vif->grp_crypto, vif->grp_crypto_len, vif->ssid_len, vif->ssid, vif->req_bssid, vif->ch_hint, ar->connect_ctrl_flags, SUBTYPE_NONE); set_bit(CONNECT_PEND, &vif->flags); return 0; } static int ath6kl_cfg80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) { struct ath6kl_vif *vif = netdev_priv(dev); if (!ath6kl_cfg80211_ready(vif)) return -EIO; ath6kl_disconnect(vif); memset(vif->ssid, 0, sizeof(vif->ssid)); vif->ssid_len = 0; return 0; } static const u32 cipher_suites[] = { WLAN_CIPHER_SUITE_WEP40, WLAN_CIPHER_SUITE_WEP104, WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, CCKM_KRK_CIPHER_SUITE, WLAN_CIPHER_SUITE_SMS4, }; static bool is_rate_legacy(s32 rate) { static const s32 legacy[] = { 1000, 2000, 5500, 11000, 6000, 9000, 12000, 18000, 24000, 36000, 48000, 54000 }; u8 i; for (i = 0; i < ARRAY_SIZE(legacy); i++) if (rate == legacy[i]) return true; return false; } static bool is_rate_ht20(s32 rate, u8 *mcs, bool *sgi) { static const s32 ht20[] = { 6500, 13000, 19500, 26000, 39000, 52000, 58500, 65000, 72200 }; u8 i; for (i = 0; i < ARRAY_SIZE(ht20); i++) { if (rate == ht20[i]) { if (i == ARRAY_SIZE(ht20) - 1) /* last rate uses sgi */ *sgi = true; else *sgi = false; *mcs = i; return true; } } return false; } static bool is_rate_ht40(s32 rate, u8 *mcs, bool *sgi) { static const s32 ht40[] = { 13500, 27000, 40500, 54000, 81000, 108000, 121500, 135000, 150000 }; u8 i; for (i = 0; i < ARRAY_SIZE(ht40); i++) { if (rate == ht40[i]) { if (i == ARRAY_SIZE(ht40) - 1) /* last rate uses sgi */ *sgi = true; else *sgi = false; *mcs = i; return true; } } return false; } static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_info *sinfo) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); long left; bool sgi; s32 rate; int ret; u8 mcs; if (memcmp(mac, vif->bssid, ETH_ALEN) != 0) return -ENOENT; if (down_interruptible(&ar->sem)) return -EBUSY; set_bit(STATS_UPDATE_PEND, &vif->flags); ret = ath6kl_wmi_get_stats_cmd(ar->wmi, vif->fw_vif_idx); if (ret != 0) { up(&ar->sem); return -EIO; } left = wait_event_interruptible_timeout(ar->event_wq, !test_bit(STATS_UPDATE_PEND, &vif->flags), WMI_TIMEOUT); up(&ar->sem); if (left == 0) return -ETIMEDOUT; else if (left < 0) return left; if (vif->target_stats.rx_byte) { sinfo->rx_bytes = vif->target_stats.rx_byte; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64); sinfo->rx_packets = vif->target_stats.rx_pkt; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS); } if (vif->target_stats.tx_byte) { sinfo->tx_bytes = vif->target_stats.tx_byte; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); sinfo->tx_packets = vif->target_stats.tx_pkt; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); } sinfo->signal = vif->target_stats.cs_rssi; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); rate = vif->target_stats.tx_ucast_rate; if (is_rate_legacy(rate)) { sinfo->txrate.legacy = rate / 100; } else if (is_rate_ht20(rate, &mcs, &sgi)) { if (sgi) { sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; sinfo->txrate.mcs = mcs - 1; } else { sinfo->txrate.mcs = mcs; } sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS; sinfo->txrate.bw = RATE_INFO_BW_20; } else if (is_rate_ht40(rate, &mcs, &sgi)) { if (sgi) { sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; sinfo->txrate.mcs = mcs - 1; } else { sinfo->txrate.mcs = mcs; } sinfo->txrate.bw = RATE_INFO_BW_40; sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS; } else { ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "invalid rate from stats: %d\n", rate); ath6kl_debug_war(ar, ATH6KL_WAR_INVALID_RATE); return 0; } sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); if (test_bit(CONNECTED, &vif->flags) && test_bit(DTIM_PERIOD_AVAIL, &vif->flags) && vif->nw_type == INFRA_NETWORK) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BSS_PARAM); sinfo->bss_param.flags = 0; sinfo->bss_param.dtim_period = vif->assoc_bss_dtim_period; sinfo->bss_param.beacon_interval = vif->assoc_bss_beacon_int; } return 0; } static int ath6kl_set_pmksa(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa) { struct ath6kl *ar = ath6kl_priv(netdev); struct ath6kl_vif *vif = netdev_priv(netdev); return ath6kl_wmi_setpmkid_cmd(ar->wmi, vif->fw_vif_idx, pmksa->bssid, pmksa->pmkid, true); } static int ath6kl_del_pmksa(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_pmksa *pmksa) { struct ath6kl *ar = ath6kl_priv(netdev); struct ath6kl_vif *vif = netdev_priv(netdev); return ath6kl_wmi_setpmkid_cmd(ar->wmi, vif->fw_vif_idx, pmksa->bssid, pmksa->pmkid, false); } static int ath6kl_flush_pmksa(struct wiphy *wiphy, struct net_device *netdev) { struct ath6kl *ar = ath6kl_priv(netdev); struct ath6kl_vif *vif = netdev_priv(netdev); if (test_bit(CONNECTED, &vif->flags)) return ath6kl_wmi_setpmkid_cmd(ar->wmi, vif->fw_vif_idx, vif->bssid, NULL, false); return 0; } static int ath6kl_wow_usr(struct ath6kl *ar, struct ath6kl_vif *vif, struct cfg80211_wowlan *wow, u32 *filter) { int ret, pos; u8 mask[WOW_PATTERN_SIZE]; u16 i; /* Configure the patterns that we received from the user. */ for (i = 0; i < wow->n_patterns; i++) { /* * Convert given nl80211 specific mask value to equivalent * driver specific mask value and send it to the chip along * with patterns. For example, If the mask value defined in * struct cfg80211_wowlan is 0xA (equivalent binary is 1010), * then equivalent driver specific mask value is * "0xFF 0x00 0xFF 0x00". */ memset(&mask, 0, sizeof(mask)); for (pos = 0; pos < wow->patterns[i].pattern_len; pos++) { if (wow->patterns[i].mask[pos / 8] & (0x1 << (pos % 8))) mask[pos] = 0xFF; } /* * Note: Pattern's offset is not passed as part of wowlan * parameter from CFG layer. So it's always passed as ZERO * to the firmware. It means, given WOW patterns are always * matched from the first byte of received pkt in the firmware. */ ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, wow->patterns[i].pattern_len, 0 /* pattern offset */, wow->patterns[i].pattern, mask); if (ret) return ret; } if (wow->disconnect) *filter |= WOW_FILTER_OPTION_NWK_DISASSOC; if (wow->magic_pkt) *filter |= WOW_FILTER_OPTION_MAGIC_PACKET; if (wow->gtk_rekey_failure) *filter |= WOW_FILTER_OPTION_GTK_ERROR; if (wow->eap_identity_req) *filter |= WOW_FILTER_OPTION_EAP_REQ; if (wow->four_way_handshake) *filter |= WOW_FILTER_OPTION_8021X_4WAYHS; return 0; } static int ath6kl_wow_ap(struct ath6kl *ar, struct ath6kl_vif *vif) { static const u8 unicst_pattern[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08 }; static const u8 unicst_mask[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f }; u8 unicst_offset = 0; static const u8 arp_pattern[] = { 0x08, 0x06 }; static const u8 arp_mask[] = { 0xff, 0xff }; u8 arp_offset = 20; static const u8 discvr_pattern[] = { 0xe0, 0x00, 0x00, 0xf8 }; static const u8 discvr_mask[] = { 0xf0, 0x00, 0x00, 0xf8 }; u8 discvr_offset = 38; static const u8 dhcp_pattern[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43 /* port 67 */ }; static const u8 dhcp_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff /* port 67 */ }; u8 dhcp_offset = 0; int ret; /* Setup unicast IP, EAPOL-like and ARP pkt pattern */ ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, sizeof(unicst_pattern), unicst_offset, unicst_pattern, unicst_mask); if (ret) { ath6kl_err("failed to add WOW unicast IP pattern\n"); return ret; } /* Setup all ARP pkt pattern */ ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, sizeof(arp_pattern), arp_offset, arp_pattern, arp_mask); if (ret) { ath6kl_err("failed to add WOW ARP pattern\n"); return ret; } /* * Setup multicast pattern for mDNS 224.0.0.251, * SSDP 239.255.255.250 and LLMNR 224.0.0.252 */ ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, sizeof(discvr_pattern), discvr_offset, discvr_pattern, discvr_mask); if (ret) { ath6kl_err("failed to add WOW mDNS/SSDP/LLMNR pattern\n"); return ret; } /* Setup all DHCP broadcast pkt pattern */ ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, sizeof(dhcp_pattern), dhcp_offset, dhcp_pattern, dhcp_mask); if (ret) { ath6kl_err("failed to add WOW DHCP broadcast pattern\n"); return ret; } return 0; } static int ath6kl_wow_sta(struct ath6kl *ar, struct ath6kl_vif *vif) { struct net_device *ndev = vif->ndev; static const u8 discvr_pattern[] = { 0xe0, 0x00, 0x00, 0xf8 }; static const u8 discvr_mask[] = { 0xf0, 0x00, 0x00, 0xf8 }; u8 discvr_offset = 38; u8 mac_mask[ETH_ALEN]; int ret; /* Setup unicast pkt pattern */ eth_broadcast_addr(mac_mask); ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, ETH_ALEN, 0, ndev->dev_addr, mac_mask); if (ret) { ath6kl_err("failed to add WOW unicast pattern\n"); return ret; } /* * Setup multicast pattern for mDNS 224.0.0.251, * SSDP 239.255.255.250 and LLMNR 224.0.0.252 */ if ((ndev->flags & IFF_ALLMULTI) || (ndev->flags & IFF_MULTICAST && netdev_mc_count(ndev) > 0)) { ret = ath6kl_wmi_add_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, sizeof(discvr_pattern), discvr_offset, discvr_pattern, discvr_mask); if (ret) { ath6kl_err("failed to add WOW mDNS/SSDP/LLMNR pattern\n"); return ret; } } return 0; } static int is_hsleep_mode_procsed(struct ath6kl_vif *vif) { return test_bit(HOST_SLEEP_MODE_CMD_PROCESSED, &vif->flags); } static bool is_ctrl_ep_empty(struct ath6kl *ar) { return !ar->tx_pending[ar->ctrl_ep]; } static int ath6kl_cfg80211_host_sleep(struct ath6kl *ar, struct ath6kl_vif *vif) { int ret, left; clear_bit(HOST_SLEEP_MODE_CMD_PROCESSED, &vif->flags); ret = ath6kl_wmi_set_host_sleep_mode_cmd(ar->wmi, vif->fw_vif_idx, ATH6KL_HOST_MODE_ASLEEP); if (ret) return ret; left = wait_event_interruptible_timeout(ar->event_wq, is_hsleep_mode_procsed(vif), WMI_TIMEOUT); if (left == 0) { ath6kl_warn("timeout, didn't get host sleep cmd processed event\n"); ret = -ETIMEDOUT; } else if (left < 0) { ath6kl_warn("error while waiting for host sleep cmd processed event %d\n", left); ret = left; } if (ar->tx_pending[ar->ctrl_ep]) { left = wait_event_interruptible_timeout(ar->event_wq, is_ctrl_ep_empty(ar), WMI_TIMEOUT); if (left == 0) { ath6kl_warn("clear wmi ctrl data timeout\n"); ret = -ETIMEDOUT; } else if (left < 0) { ath6kl_warn("clear wmi ctrl data failed: %d\n", left); ret = left; } } return ret; } static int ath6kl_wow_suspend_vif(struct ath6kl_vif *vif, struct cfg80211_wowlan *wow, u32 *filter) { struct ath6kl *ar = vif->ar; struct in_device *in_dev; struct in_ifaddr *ifa; int ret; u16 i, bmiss_time; __be32 ips[MAX_IP_ADDRS]; u8 index = 0; if (!test_bit(NETDEV_MCAST_ALL_ON, &vif->flags) && test_bit(ATH6KL_FW_CAPABILITY_WOW_MULTICAST_FILTER, ar->fw_capabilities)) { ret = ath6kl_wmi_mcast_filter_cmd(vif->ar->wmi, vif->fw_vif_idx, false); if (ret) return ret; } /* Clear existing WOW patterns */ for (i = 0; i < WOW_MAX_FILTERS_PER_LIST; i++) ath6kl_wmi_del_wow_pattern_cmd(ar->wmi, vif->fw_vif_idx, WOW_LIST_ID, i); /* * Skip the default WOW pattern configuration * if the driver receives any WOW patterns from * the user. */ if (wow) ret = ath6kl_wow_usr(ar, vif, wow, filter); else if (vif->nw_type == AP_NETWORK) ret = ath6kl_wow_ap(ar, vif); else ret = ath6kl_wow_sta(ar, vif); if (ret) return ret; netif_stop_queue(vif->ndev); if (vif->nw_type != AP_NETWORK) { ret = ath6kl_wmi_listeninterval_cmd(ar->wmi, vif->fw_vif_idx, ATH6KL_MAX_WOW_LISTEN_INTL, 0); if (ret) return ret; /* Set listen interval x 15 times as bmiss time */ bmiss_time = ATH6KL_MAX_WOW_LISTEN_INTL * 15; if (bmiss_time > ATH6KL_MAX_BMISS_TIME) bmiss_time = ATH6KL_MAX_BMISS_TIME; ret = ath6kl_wmi_bmisstime_cmd(ar->wmi, vif->fw_vif_idx, bmiss_time, 0); if (ret) return ret; ret = ath6kl_wmi_scanparams_cmd(ar->wmi, vif->fw_vif_idx, 0xFFFF, 0, 0xFFFF, 0, 0, 0, 0, 0, 0, 0); if (ret) return ret; } /* Setup own IP addr for ARP agent. */ in_dev = __in_dev_get_rtnl(vif->ndev); if (!in_dev) return 0; ifa = rtnl_dereference(in_dev->ifa_list); memset(&ips, 0, sizeof(ips)); /* Configure IP addr only if IP address count < MAX_IP_ADDRS */ while (index < MAX_IP_ADDRS && ifa) { ips[index] = ifa->ifa_local; ifa = rtnl_dereference(ifa->ifa_next); index++; } if (ifa) { ath6kl_err("total IP addr count is exceeding fw limit\n"); return -EINVAL; } ret = ath6kl_wmi_set_ip_cmd(ar->wmi, vif->fw_vif_idx, ips[0], ips[1]); if (ret) { ath6kl_err("fail to setup ip for arp agent\n"); return ret; } return ret; } static int ath6kl_wow_suspend(struct ath6kl *ar, struct cfg80211_wowlan *wow) { struct ath6kl_vif *first_vif, *vif; int ret = 0; u32 filter = 0; bool connected = false; /* enter / leave wow suspend on first vif always */ first_vif = ath6kl_vif_first(ar); if (WARN_ON(!first_vif) || !ath6kl_cfg80211_ready(first_vif)) return -EIO; if (wow && (wow->n_patterns > WOW_MAX_FILTERS_PER_LIST)) return -EINVAL; /* install filters for each connected vif */ spin_lock_bh(&ar->list_lock); list_for_each_entry(vif, &ar->vif_list, list) { if (!test_bit(CONNECTED, &vif->flags) || !ath6kl_cfg80211_ready(vif)) continue; connected = true; ret = ath6kl_wow_suspend_vif(vif, wow, &filter); if (ret) break; } spin_unlock_bh(&ar->list_lock); if (!connected) return -ENOTCONN; else if (ret) return ret; ar->state = ATH6KL_STATE_SUSPENDING; ret = ath6kl_wmi_set_wow_mode_cmd(ar->wmi, first_vif->fw_vif_idx, ATH6KL_WOW_MODE_ENABLE, filter, WOW_HOST_REQ_DELAY); if (ret) return ret; return ath6kl_cfg80211_host_sleep(ar, first_vif); } static int ath6kl_wow_resume_vif(struct ath6kl_vif *vif) { struct ath6kl *ar = vif->ar; int ret; if (vif->nw_type != AP_NETWORK) { ret = ath6kl_wmi_scanparams_cmd(ar->wmi, vif->fw_vif_idx, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0); if (ret) return ret; ret = ath6kl_wmi_listeninterval_cmd(ar->wmi, vif->fw_vif_idx, vif->listen_intvl_t, 0); if (ret) return ret; ret = ath6kl_wmi_bmisstime_cmd(ar->wmi, vif->fw_vif_idx, vif->bmiss_time_t, 0); if (ret) return ret; } if (!test_bit(NETDEV_MCAST_ALL_OFF, &vif->flags) && test_bit(ATH6KL_FW_CAPABILITY_WOW_MULTICAST_FILTER, ar->fw_capabilities)) { ret = ath6kl_wmi_mcast_filter_cmd(vif->ar->wmi, vif->fw_vif_idx, true); if (ret) return ret; } netif_wake_queue(vif->ndev); return 0; } static int ath6kl_wow_resume(struct ath6kl *ar) { struct ath6kl_vif *vif; int ret; vif = ath6kl_vif_first(ar); if (WARN_ON(!vif) || !ath6kl_cfg80211_ready(vif)) return -EIO; ar->state = ATH6KL_STATE_RESUMING; ret = ath6kl_wmi_set_host_sleep_mode_cmd(ar->wmi, vif->fw_vif_idx, ATH6KL_HOST_MODE_AWAKE); if (ret) { ath6kl_warn("Failed to configure host sleep mode for wow resume: %d\n", ret); goto cleanup; } spin_lock_bh(&ar->list_lock); list_for_each_entry(vif, &ar->vif_list, list) { if (!test_bit(CONNECTED, &vif->flags) || !ath6kl_cfg80211_ready(vif)) continue; ret = ath6kl_wow_resume_vif(vif); if (ret) break; } spin_unlock_bh(&ar->list_lock); if (ret) goto cleanup; ar->state = ATH6KL_STATE_ON; return 0; cleanup: ar->state = ATH6KL_STATE_WOW; return ret; } static int ath6kl_cfg80211_deepsleep_suspend(struct ath6kl *ar) { struct ath6kl_vif *vif; int ret; vif = ath6kl_vif_first(ar); if (!vif) return -EIO; if (!test_bit(WMI_READY, &ar->flag)) { ath6kl_err("deepsleep failed as wmi is not ready\n"); return -EIO; } ath6kl_cfg80211_stop_all(ar); /* Save the current power mode before enabling power save */ ar->wmi->saved_pwr_mode = ar->wmi->pwr_mode; ret = ath6kl_wmi_powermode_cmd(ar->wmi, 0, REC_POWER); if (ret) return ret; /* Disable WOW mode */ ret = ath6kl_wmi_set_wow_mode_cmd(ar->wmi, vif->fw_vif_idx, ATH6KL_WOW_MODE_DISABLE, 0, 0); if (ret) return ret; /* Flush all non control pkts in TX path */ ath6kl_tx_data_cleanup(ar); ret = ath6kl_cfg80211_host_sleep(ar, vif); if (ret) return ret; return 0; } static int ath6kl_cfg80211_deepsleep_resume(struct ath6kl *ar) { struct ath6kl_vif *vif; int ret; vif = ath6kl_vif_first(ar); if (!vif) return -EIO; if (ar->wmi->pwr_mode != ar->wmi->saved_pwr_mode) { ret = ath6kl_wmi_powermode_cmd(ar->wmi, 0, ar->wmi->saved_pwr_mode); if (ret) return ret; } ret = ath6kl_wmi_set_host_sleep_mode_cmd(ar->wmi, vif->fw_vif_idx, ATH6KL_HOST_MODE_AWAKE); if (ret) return ret; ar->state = ATH6KL_STATE_ON; /* Reset scan parameter to default values */ ret = ath6kl_wmi_scanparams_cmd(ar->wmi, vif->fw_vif_idx, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0); if (ret) return ret; return 0; } int ath6kl_cfg80211_suspend(struct ath6kl *ar, enum ath6kl_cfg_suspend_mode mode, struct cfg80211_wowlan *wow) { struct ath6kl_vif *vif; enum ath6kl_state prev_state; int ret; switch (mode) { case ATH6KL_CFG_SUSPEND_WOW: ath6kl_dbg(ATH6KL_DBG_SUSPEND, "wow mode suspend\n"); /* Flush all non control pkts in TX path */ ath6kl_tx_data_cleanup(ar); prev_state = ar->state; ret = ath6kl_wow_suspend(ar, wow); if (ret) { ar->state = prev_state; return ret; } ar->state = ATH6KL_STATE_WOW; break; case ATH6KL_CFG_SUSPEND_DEEPSLEEP: ath6kl_dbg(ATH6KL_DBG_SUSPEND, "deep sleep suspend\n"); ret = ath6kl_cfg80211_deepsleep_suspend(ar); if (ret) { ath6kl_err("deepsleep suspend failed: %d\n", ret); return ret; } ar->state = ATH6KL_STATE_DEEPSLEEP; break; case ATH6KL_CFG_SUSPEND_CUTPOWER: ath6kl_cfg80211_stop_all(ar); if (ar->state == ATH6KL_STATE_OFF) { ath6kl_dbg(ATH6KL_DBG_SUSPEND, "suspend hw off, no action for cutpower\n"); break; } ath6kl_dbg(ATH6KL_DBG_SUSPEND, "suspend cutting power\n"); ret = ath6kl_init_hw_stop(ar); if (ret) { ath6kl_warn("failed to stop hw during suspend: %d\n", ret); } ar->state = ATH6KL_STATE_CUTPOWER; break; default: break; } list_for_each_entry(vif, &ar->vif_list, list) ath6kl_cfg80211_scan_complete_event(vif, true); return 0; } EXPORT_SYMBOL(ath6kl_cfg80211_suspend); int ath6kl_cfg80211_resume(struct ath6kl *ar) { int ret; switch (ar->state) { case ATH6KL_STATE_WOW: ath6kl_dbg(ATH6KL_DBG_SUSPEND, "wow mode resume\n"); ret = ath6kl_wow_resume(ar); if (ret) { ath6kl_warn("wow mode resume failed: %d\n", ret); return ret; } break; case ATH6KL_STATE_DEEPSLEEP: ath6kl_dbg(ATH6KL_DBG_SUSPEND, "deep sleep resume\n"); ret = ath6kl_cfg80211_deepsleep_resume(ar); if (ret) { ath6kl_warn("deep sleep resume failed: %d\n", ret); return ret; } break; case ATH6KL_STATE_CUTPOWER: ath6kl_dbg(ATH6KL_DBG_SUSPEND, "resume restoring power\n"); ret = ath6kl_init_hw_start(ar); if (ret) { ath6kl_warn("Failed to boot hw in resume: %d\n", ret); return ret; } break; default: break; } return 0; } EXPORT_SYMBOL(ath6kl_cfg80211_resume); #ifdef CONFIG_PM /* hif layer decides what suspend mode to use */ static int __ath6kl_cfg80211_suspend(struct wiphy *wiphy, struct cfg80211_wowlan *wow) { struct ath6kl *ar = wiphy_priv(wiphy); ath6kl_recovery_suspend(ar); return ath6kl_hif_suspend(ar, wow); } static int __ath6kl_cfg80211_resume(struct wiphy *wiphy) { struct ath6kl *ar = wiphy_priv(wiphy); int err; err = ath6kl_hif_resume(ar); if (err) return err; ath6kl_recovery_resume(ar); return 0; } /* * FIXME: WOW suspend mode is selected if the host sdio controller supports * both sdio irq wake up and keep power. The target pulls sdio data line to * wake up the host when WOW pattern matches. This causes sdio irq handler * is being called in the host side which internally hits ath6kl's RX path. * * Since sdio interrupt is not disabled, RX path executes even before * the host executes the actual resume operation from PM module. * * In the current scenario, WOW resume should happen before start processing * any data from the target. So It's required to perform WOW resume in RX path. * Ideally we should perform WOW resume only in the actual platform * resume path. This area needs bit rework to avoid WOW resume in RX path. * * ath6kl_check_wow_status() is called from ath6kl_rx(). */ void ath6kl_check_wow_status(struct ath6kl *ar) { if (ar->state == ATH6KL_STATE_SUSPENDING) return; if (ar->state == ATH6KL_STATE_WOW) ath6kl_cfg80211_resume(ar); } #else void ath6kl_check_wow_status(struct ath6kl *ar) { } #endif static int ath6kl_set_htcap(struct ath6kl_vif *vif, enum nl80211_band band, bool ht_enable) { struct ath6kl_htcap *htcap = &vif->htcap[band]; if (htcap->ht_enable == ht_enable) return 0; if (ht_enable) { /* Set default ht capabilities */ htcap->ht_enable = true; htcap->cap_info = (band == NL80211_BAND_2GHZ) ? ath6kl_g_htcap : ath6kl_a_htcap; htcap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_16K; } else /* Disable ht */ memset(htcap, 0, sizeof(*htcap)); return ath6kl_wmi_set_htcap_cmd(vif->ar->wmi, vif->fw_vif_idx, band, htcap); } static int ath6kl_restore_htcap(struct ath6kl_vif *vif) { struct wiphy *wiphy = vif->ar->wiphy; int band, ret = 0; for (band = 0; band < NUM_NL80211_BANDS; band++) { if (!wiphy->bands[band]) continue; ret = ath6kl_set_htcap(vif, band, wiphy->bands[band]->ht_cap.ht_supported); if (ret) return ret; } return ret; } static bool ath6kl_is_p2p_ie(const u8 *pos) { return pos[0] == WLAN_EID_VENDOR_SPECIFIC && pos[1] >= 4 && pos[2] == 0x50 && pos[3] == 0x6f && pos[4] == 0x9a && pos[5] == 0x09; } static int ath6kl_set_ap_probe_resp_ies(struct ath6kl_vif *vif, const u8 *ies, size_t ies_len) { struct ath6kl *ar = vif->ar; const u8 *pos; u8 *buf = NULL; size_t len = 0; int ret; /* * Filter out P2P IE(s) since they will be included depending on * the Probe Request frame in ath6kl_send_go_probe_resp(). */ if (ies && ies_len) { buf = kmalloc(ies_len, GFP_KERNEL); if (buf == NULL) return -ENOMEM; pos = ies; while (pos + 1 < ies + ies_len) { if (pos + 2 + pos[1] > ies + ies_len) break; if (!ath6kl_is_p2p_ie(pos)) { memcpy(buf + len, pos, 2 + pos[1]); len += 2 + pos[1]; } pos += 2 + pos[1]; } } ret = ath6kl_wmi_set_appie_cmd(ar->wmi, vif->fw_vif_idx, WMI_FRAME_PROBE_RESP, buf, len); kfree(buf); return ret; } static int ath6kl_set_ies(struct ath6kl_vif *vif, struct cfg80211_beacon_data *info) { struct ath6kl *ar = vif->ar; int res; /* this also clears IE in fw if it's not set */ res = ath6kl_wmi_set_appie_cmd(ar->wmi, vif->fw_vif_idx, WMI_FRAME_BEACON, info->beacon_ies, info->beacon_ies_len); if (res) return res; /* this also clears IE in fw if it's not set */ res = ath6kl_set_ap_probe_resp_ies(vif, info->proberesp_ies, info->proberesp_ies_len); if (res) return res; /* this also clears IE in fw if it's not set */ res = ath6kl_wmi_set_appie_cmd(ar->wmi, vif->fw_vif_idx, WMI_FRAME_ASSOC_RESP, info->assocresp_ies, info->assocresp_ies_len); if (res) return res; return 0; } static int ath6kl_get_rsn_capab(struct cfg80211_beacon_data *beacon, u8 *rsn_capab) { const u8 *rsn_ie; size_t rsn_ie_len; u16 cnt; if (!beacon->tail) return -EINVAL; rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, beacon->tail, beacon->tail_len); if (!rsn_ie) return -EINVAL; rsn_ie_len = *(rsn_ie + 1); /* skip element id and length */ rsn_ie += 2; /* skip version */ if (rsn_ie_len < 2) return -EINVAL; rsn_ie += 2; rsn_ie_len -= 2; /* skip group cipher suite */ if (rsn_ie_len < 4) return 0; rsn_ie += 4; rsn_ie_len -= 4; /* skip pairwise cipher suite */ if (rsn_ie_len < 2) return 0; cnt = get_unaligned_le16(rsn_ie); rsn_ie += (2 + cnt * 4); rsn_ie_len -= (2 + cnt * 4); /* skip akm suite */ if (rsn_ie_len < 2) return 0; cnt = get_unaligned_le16(rsn_ie); rsn_ie += (2 + cnt * 4); rsn_ie_len -= (2 + cnt * 4); if (rsn_ie_len < 2) return 0; memcpy(rsn_capab, rsn_ie, 2); return 0; } static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *info) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); struct ieee80211_mgmt *mgmt; bool hidden = false; u8 *ies; struct wmi_connect_cmd p; int res; int i, ret; u16 rsn_capab = 0; int inactivity_timeout = 0; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s:\n", __func__); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (vif->next_mode != AP_NETWORK) return -EOPNOTSUPP; res = ath6kl_set_ies(vif, &info->beacon); ar->ap_mode_bkey.valid = false; ret = ath6kl_wmi_ap_set_beacon_intvl_cmd(ar->wmi, vif->fw_vif_idx, info->beacon_interval); if (ret) ath6kl_warn("Failed to set beacon interval: %d\n", ret); ret = ath6kl_wmi_ap_set_dtim_cmd(ar->wmi, vif->fw_vif_idx, info->dtim_period); /* ignore error, just print a warning and continue normally */ if (ret) ath6kl_warn("Failed to set dtim_period in beacon: %d\n", ret); if (info->beacon.head == NULL) return -EINVAL; mgmt = (struct ieee80211_mgmt *) info->beacon.head; ies = mgmt->u.beacon.variable; if (ies > info->beacon.head + info->beacon.head_len) return -EINVAL; if (info->ssid == NULL) return -EINVAL; memcpy(vif->ssid, info->ssid, info->ssid_len); vif->ssid_len = info->ssid_len; if (info->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE) hidden = true; res = ath6kl_wmi_ap_hidden_ssid(ar->wmi, vif->fw_vif_idx, hidden); if (res) return res; ret = ath6kl_set_auth_type(vif, info->auth_type); if (ret) return ret; memset(&p, 0, sizeof(p)); for (i = 0; i < info->crypto.n_akm_suites; i++) { switch (info->crypto.akm_suites[i]) { case WLAN_AKM_SUITE_8021X: if (info->crypto.wpa_versions & NL80211_WPA_VERSION_1) p.auth_mode |= WPA_AUTH; if (info->crypto.wpa_versions & NL80211_WPA_VERSION_2) p.auth_mode |= WPA2_AUTH; break; case WLAN_AKM_SUITE_PSK: if (info->crypto.wpa_versions & NL80211_WPA_VERSION_1) p.auth_mode |= WPA_PSK_AUTH; if (info->crypto.wpa_versions & NL80211_WPA_VERSION_2) p.auth_mode |= WPA2_PSK_AUTH; break; } } if (p.auth_mode == 0) p.auth_mode = NONE_AUTH; vif->auth_mode = p.auth_mode; for (i = 0; i < info->crypto.n_ciphers_pairwise; i++) { switch (info->crypto.ciphers_pairwise[i]) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: p.prwise_crypto_type |= WEP_CRYPT; break; case WLAN_CIPHER_SUITE_TKIP: p.prwise_crypto_type |= TKIP_CRYPT; break; case WLAN_CIPHER_SUITE_CCMP: p.prwise_crypto_type |= AES_CRYPT; break; case WLAN_CIPHER_SUITE_SMS4: p.prwise_crypto_type |= WAPI_CRYPT; break; } } if (p.prwise_crypto_type == 0) { p.prwise_crypto_type = NONE_CRYPT; ath6kl_set_cipher(vif, 0, true); } else if (info->crypto.n_ciphers_pairwise == 1) { ath6kl_set_cipher(vif, info->crypto.ciphers_pairwise[0], true); } switch (info->crypto.cipher_group) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: p.grp_crypto_type = WEP_CRYPT; break; case WLAN_CIPHER_SUITE_TKIP: p.grp_crypto_type = TKIP_CRYPT; break; case WLAN_CIPHER_SUITE_CCMP: p.grp_crypto_type = AES_CRYPT; break; case WLAN_CIPHER_SUITE_SMS4: p.grp_crypto_type = WAPI_CRYPT; break; default: p.grp_crypto_type = NONE_CRYPT; break; } ath6kl_set_cipher(vif, info->crypto.cipher_group, false); p.nw_type = AP_NETWORK; vif->nw_type = vif->next_mode; p.ssid_len = vif->ssid_len; memcpy(p.ssid, vif->ssid, vif->ssid_len); p.dot11_auth_mode = vif->dot11_auth_mode; p.ch = cpu_to_le16(info->chandef.chan->center_freq); /* Enable uAPSD support by default */ res = ath6kl_wmi_ap_set_apsd(ar->wmi, vif->fw_vif_idx, true); if (res < 0) return res; if (vif->wdev.iftype == NL80211_IFTYPE_P2P_GO) { p.nw_subtype = SUBTYPE_P2PGO; } else { /* * Due to firmware limitation, it is not possible to * do P2P mgmt operations in AP mode */ p.nw_subtype = SUBTYPE_NONE; } if (info->inactivity_timeout) { inactivity_timeout = info->inactivity_timeout; if (test_bit(ATH6KL_FW_CAPABILITY_AP_INACTIVITY_MINS, ar->fw_capabilities)) inactivity_timeout = DIV_ROUND_UP(inactivity_timeout, 60); res = ath6kl_wmi_set_inact_period(ar->wmi, vif->fw_vif_idx, inactivity_timeout); if (res < 0) return res; } if (ath6kl_set_htcap(vif, info->chandef.chan->band, cfg80211_get_chandef_type(&info->chandef) != NL80211_CHAN_NO_HT)) return -EIO; /* * Get the PTKSA replay counter in the RSN IE. Supplicant * will use the RSN IE in M3 message and firmware has to * advertise the same in beacon/probe response. Send * the complete RSN IE capability field to firmware */ if (!ath6kl_get_rsn_capab(&info->beacon, (u8 *) &rsn_capab) && test_bit(ATH6KL_FW_CAPABILITY_RSN_CAP_OVERRIDE, ar->fw_capabilities)) { res = ath6kl_wmi_set_ie_cmd(ar->wmi, vif->fw_vif_idx, WLAN_EID_RSN, WMI_RSN_IE_CAPB, (const u8 *) &rsn_capab, sizeof(rsn_capab)); vif->rsn_capab = rsn_capab; if (res < 0) return res; } memcpy(&vif->profile, &p, sizeof(p)); res = ath6kl_wmi_ap_profile_commit(ar->wmi, vif->fw_vif_idx, &p); if (res < 0) return res; return 0; } static int ath6kl_change_beacon(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_update *params) { struct ath6kl_vif *vif = netdev_priv(dev); if (!ath6kl_cfg80211_ready(vif)) return -EIO; if (vif->next_mode != AP_NETWORK) return -EOPNOTSUPP; return ath6kl_set_ies(vif, &params->beacon); } static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); if (vif->nw_type != AP_NETWORK) return -EOPNOTSUPP; if (!test_bit(CONNECTED, &vif->flags)) return -ENOTCONN; ath6kl_wmi_disconnect_cmd(ar->wmi, vif->fw_vif_idx); clear_bit(CONNECTED, &vif->flags); netif_carrier_off(vif->ndev); /* Restore ht setting in firmware */ return ath6kl_restore_htcap(vif); } static const u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev, struct station_del_parameters *params) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); const u8 *addr = params->mac ? params->mac : bcast_addr; return ath6kl_wmi_ap_set_mlme(ar->wmi, vif->fw_vif_idx, WMI_AP_DEAUTH, addr, WLAN_REASON_PREV_AUTH_NOT_VALID); } static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, const u8 *mac, struct station_parameters *params) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); int err; if (vif->nw_type != AP_NETWORK) return -EOPNOTSUPP; err = cfg80211_check_station_change(wiphy, params, CFG80211_STA_AP_MLME_CLIENT); if (err) return err; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED)) return ath6kl_wmi_ap_set_mlme(ar->wmi, vif->fw_vif_idx, WMI_AP_MLME_AUTHORIZE, mac, 0); return ath6kl_wmi_ap_set_mlme(ar->wmi, vif->fw_vif_idx, WMI_AP_MLME_UNAUTHORIZE, mac, 0); } static int ath6kl_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, unsigned int duration, u64 *cookie) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); struct ath6kl *ar = ath6kl_priv(vif->ndev); u32 id; /* TODO: if already pending or ongoing remain-on-channel, * return -EBUSY */ id = ++vif->last_roc_id; if (id == 0) { /* Do not use 0 as the cookie value */ id = ++vif->last_roc_id; } *cookie = id; return ath6kl_wmi_remain_on_chnl_cmd(ar->wmi, vif->fw_vif_idx, chan->center_freq, duration); } static int ath6kl_cancel_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); struct ath6kl *ar = ath6kl_priv(vif->ndev); if (cookie != vif->last_roc_id) return -ENOENT; vif->last_cancel_roc_id = cookie; return ath6kl_wmi_cancel_remain_on_chnl_cmd(ar->wmi, vif->fw_vif_idx); } static int ath6kl_send_go_probe_resp(struct ath6kl_vif *vif, const u8 *buf, size_t len, unsigned int freq) { struct ath6kl *ar = vif->ar; const u8 *pos; u8 *p2p; int p2p_len; int ret; const struct ieee80211_mgmt *mgmt; mgmt = (const struct ieee80211_mgmt *) buf; /* Include P2P IE(s) from the frame generated in user space. */ p2p = kmalloc(len, GFP_KERNEL); if (p2p == NULL) return -ENOMEM; p2p_len = 0; pos = mgmt->u.probe_resp.variable; while (pos + 1 < buf + len) { if (pos + 2 + pos[1] > buf + len) break; if (ath6kl_is_p2p_ie(pos)) { memcpy(p2p + p2p_len, pos, 2 + pos[1]); p2p_len += 2 + pos[1]; } pos += 2 + pos[1]; } ret = ath6kl_wmi_send_probe_response_cmd(ar->wmi, vif->fw_vif_idx, freq, mgmt->da, p2p, p2p_len); kfree(p2p); return ret; } static bool ath6kl_mgmt_powersave_ap(struct ath6kl_vif *vif, u32 id, u32 freq, u32 wait, const u8 *buf, size_t len, bool *more_data, bool no_cck) { struct ieee80211_mgmt *mgmt; struct ath6kl_sta *conn; bool is_psq_empty = false; struct ath6kl_mgmt_buff *mgmt_buf; size_t mgmt_buf_size; struct ath6kl *ar = vif->ar; mgmt = (struct ieee80211_mgmt *) buf; if (is_multicast_ether_addr(mgmt->da)) return false; conn = ath6kl_find_sta(vif, mgmt->da); if (!conn) return false; if (conn->sta_flags & STA_PS_SLEEP) { if (!(conn->sta_flags & STA_PS_POLLED)) { /* Queue the frames if the STA is sleeping */ mgmt_buf_size = len + sizeof(struct ath6kl_mgmt_buff); mgmt_buf = kmalloc(mgmt_buf_size, GFP_KERNEL); if (!mgmt_buf) return false; INIT_LIST_HEAD(&mgmt_buf->list); mgmt_buf->id = id; mgmt_buf->freq = freq; mgmt_buf->wait = wait; mgmt_buf->len = len; mgmt_buf->no_cck = no_cck; memcpy(mgmt_buf->buf, buf, len); spin_lock_bh(&conn->psq_lock); is_psq_empty = skb_queue_empty(&conn->psq) && (conn->mgmt_psq_len == 0); list_add_tail(&mgmt_buf->list, &conn->mgmt_psq); conn->mgmt_psq_len++; spin_unlock_bh(&conn->psq_lock); /* * If this is the first pkt getting queued * for this STA, update the PVB for this * STA. */ if (is_psq_empty) ath6kl_wmi_set_pvb_cmd(ar->wmi, vif->fw_vif_idx, conn->aid, 1); return true; } /* * This tx is because of a PsPoll. * Determine if MoreData bit has to be set. */ spin_lock_bh(&conn->psq_lock); if (!skb_queue_empty(&conn->psq) || (conn->mgmt_psq_len != 0)) *more_data = true; spin_unlock_bh(&conn->psq_lock); } return false; } /* Check if SSID length is greater than DIRECT- */ static bool ath6kl_is_p2p_go_ssid(const u8 *buf, size_t len) { const struct ieee80211_mgmt *mgmt; mgmt = (const struct ieee80211_mgmt *) buf; /* variable[1] contains the SSID tag length */ if (buf + len >= &mgmt->u.probe_resp.variable[1] && (mgmt->u.probe_resp.variable[1] > P2P_WILDCARD_SSID_LEN)) { return true; } return false; } static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_mgmt_tx_params *params, u64 *cookie) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); struct ath6kl *ar = ath6kl_priv(vif->ndev); struct ieee80211_channel *chan = params->chan; const u8 *buf = params->buf; size_t len = params->len; unsigned int wait = params->wait; bool no_cck = params->no_cck; u32 id, freq; const struct ieee80211_mgmt *mgmt; bool more_data, queued; /* default to the current channel, but use the one specified as argument * if any */ freq = vif->ch_hint; if (chan) freq = chan->center_freq; /* never send freq zero to the firmware */ if (WARN_ON(freq == 0)) return -EINVAL; mgmt = (const struct ieee80211_mgmt *) buf; if (vif->nw_type == AP_NETWORK && test_bit(CONNECTED, &vif->flags) && ieee80211_is_probe_resp(mgmt->frame_control) && ath6kl_is_p2p_go_ssid(buf, len)) { /* * Send Probe Response frame in GO mode using a separate WMI * command to allow the target to fill in the generic IEs. */ *cookie = 0; /* TX status not supported */ return ath6kl_send_go_probe_resp(vif, buf, len, freq); } id = vif->send_action_id++; if (id == 0) { /* * 0 is a reserved value in the WMI command and shall not be * used for the command. */ id = vif->send_action_id++; } *cookie = id; /* AP mode Power saving processing */ if (vif->nw_type == AP_NETWORK) { queued = ath6kl_mgmt_powersave_ap(vif, id, freq, wait, buf, len, &more_data, no_cck); if (queued) return 0; } return ath6kl_wmi_send_mgmt_cmd(ar->wmi, vif->fw_vif_idx, id, freq, wait, buf, len, no_cck); } static int ath6kl_get_antenna(struct wiphy *wiphy, int radio_idx, u32 *tx_ant, u32 *rx_ant) { struct ath6kl *ar = wiphy_priv(wiphy); *tx_ant = ar->hw.tx_ant; *rx_ant = ar->hw.rx_ant; return 0; } static void ath6kl_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wireless_dev *wdev, struct mgmt_frame_regs *upd) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); /* * FIXME: send WMI_PROBE_REQ_REPORT_CMD here instead of hardcoding * the reporting in the target all the time, this callback * *is* allowed to sleep after all. */ vif->probe_req_report = upd->interface_stypes & BIT(IEEE80211_STYPE_PROBE_REQ >> 4); } static int ath6kl_cfg80211_sscan_start(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_sched_scan_request *request) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); u16 interval; int ret, rssi_thold; int n_match_sets = request->n_match_sets; /* * If there's a matchset w/o an SSID, then assume it's just for * the RSSI (nothing else is currently supported) and ignore it. * The device only supports a global RSSI filter that we set below. */ if (n_match_sets == 1 && !request->match_sets[0].ssid.ssid_len) n_match_sets = 0; if (ar->state != ATH6KL_STATE_ON) return -EIO; if (vif->sme_state != SME_DISCONNECTED) return -EBUSY; ath6kl_cfg80211_scan_complete_event(vif, true); ret = ath6kl_set_probed_ssids(ar, vif, request->ssids, request->n_ssids, request->match_sets, n_match_sets); if (ret < 0) return ret; if (!n_match_sets) { ret = ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx, ALL_BSS_FILTER, 0); if (ret < 0) return ret; } else { ret = ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx, MATCHED_SSID_FILTER, 0); if (ret < 0) return ret; } if (test_bit(ATH6KL_FW_CAPABILITY_RSSI_SCAN_THOLD, ar->fw_capabilities)) { if (request->min_rssi_thold <= NL80211_SCAN_RSSI_THOLD_OFF) rssi_thold = 0; else if (request->min_rssi_thold < -127) rssi_thold = -127; else rssi_thold = request->min_rssi_thold; ret = ath6kl_wmi_set_rssi_filter_cmd(ar->wmi, vif->fw_vif_idx, rssi_thold); if (ret) { ath6kl_err("failed to set RSSI threshold for scan\n"); return ret; } } /* fw uses seconds, also make sure that it's >0 */ interval = max_t(u16, 1, request->scan_plans[0].interval); ath6kl_wmi_scanparams_cmd(ar->wmi, vif->fw_vif_idx, interval, interval, vif->bg_scan_period, 0, 0, 0, 3, 0, 0, 0); /* this also clears IE in fw if it's not set */ ret = ath6kl_wmi_set_appie_cmd(ar->wmi, vif->fw_vif_idx, WMI_FRAME_PROBE_REQ, request->ie, request->ie_len); if (ret) { ath6kl_warn("Failed to set probe request IE for scheduled scan: %d\n", ret); return ret; } ret = ath6kl_wmi_enable_sched_scan_cmd(ar->wmi, vif->fw_vif_idx, true); if (ret) return ret; set_bit(SCHED_SCANNING, &vif->flags); return 0; } static int ath6kl_cfg80211_sscan_stop(struct wiphy *wiphy, struct net_device *dev, u64 reqid) { struct ath6kl_vif *vif = netdev_priv(dev); bool stopped; stopped = __ath6kl_cfg80211_sscan_stop(vif); if (!stopped) return -EIO; return 0; } static int ath6kl_cfg80211_set_bitrate(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); return ath6kl_wmi_set_bitrate_mask(ar->wmi, vif->fw_vif_idx, mask); } static int ath6kl_cfg80211_set_txe_config(struct wiphy *wiphy, struct net_device *dev, u32 rate, u32 pkts, u32 intvl) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); if (vif->nw_type != INFRA_NETWORK || !test_bit(ATH6KL_FW_CAPABILITY_TX_ERR_NOTIFY, ar->fw_capabilities)) return -EOPNOTSUPP; if (vif->sme_state != SME_CONNECTED) return -ENOTCONN; /* save this since the firmware won't report the interval */ vif->txe_intvl = intvl; return ath6kl_wmi_set_txe_notify(ar->wmi, vif->fw_vif_idx, rate, pkts, intvl); } static const struct ieee80211_txrx_stypes ath6kl_mgmt_stypes[NUM_NL80211_IFTYPES] = { [NL80211_IFTYPE_STATION] = { .tx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_RESP >> 4), .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_REQ >> 4) }, [NL80211_IFTYPE_AP] = { .tx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_RESP >> 4), .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_REQ >> 4) }, [NL80211_IFTYPE_P2P_CLIENT] = { .tx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_RESP >> 4), .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_REQ >> 4) }, [NL80211_IFTYPE_P2P_GO] = { .tx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_RESP >> 4), .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | BIT(IEEE80211_STYPE_PROBE_REQ >> 4) }, }; static struct cfg80211_ops ath6kl_cfg80211_ops = { .add_virtual_intf = ath6kl_cfg80211_add_iface, .del_virtual_intf = ath6kl_cfg80211_del_iface, .change_virtual_intf = ath6kl_cfg80211_change_iface, .scan = ath6kl_cfg80211_scan, .connect = ath6kl_cfg80211_connect, .disconnect = ath6kl_cfg80211_disconnect, .add_key = ath6kl_cfg80211_add_key, .get_key = ath6kl_cfg80211_get_key, .del_key = ath6kl_cfg80211_del_key, .set_default_key = ath6kl_cfg80211_set_default_key, .set_wiphy_params = ath6kl_cfg80211_set_wiphy_params, .set_tx_power = ath6kl_cfg80211_set_txpower, .get_tx_power = ath6kl_cfg80211_get_txpower, .set_power_mgmt = ath6kl_cfg80211_set_power_mgmt, .join_ibss = ath6kl_cfg80211_join_ibss, .leave_ibss = ath6kl_cfg80211_leave_ibss, .get_station = ath6kl_get_station, .set_pmksa = ath6kl_set_pmksa, .del_pmksa = ath6kl_del_pmksa, .flush_pmksa = ath6kl_flush_pmksa, CFG80211_TESTMODE_CMD(ath6kl_tm_cmd) #ifdef CONFIG_PM .suspend = __ath6kl_cfg80211_suspend, .resume = __ath6kl_cfg80211_resume, #endif .start_ap = ath6kl_start_ap, .change_beacon = ath6kl_change_beacon, .stop_ap = ath6kl_stop_ap, .del_station = ath6kl_del_station, .change_station = ath6kl_change_station, .remain_on_channel = ath6kl_remain_on_channel, .cancel_remain_on_channel = ath6kl_cancel_remain_on_channel, .mgmt_tx = ath6kl_mgmt_tx, .update_mgmt_frame_registrations = ath6kl_update_mgmt_frame_registrations, .get_antenna = ath6kl_get_antenna, .sched_scan_start = ath6kl_cfg80211_sscan_start, .sched_scan_stop = ath6kl_cfg80211_sscan_stop, .set_bitrate_mask = ath6kl_cfg80211_set_bitrate, .set_cqm_txe_config = ath6kl_cfg80211_set_txe_config, }; void ath6kl_cfg80211_stop(struct ath6kl_vif *vif) { ath6kl_cfg80211_sscan_disable(vif); switch (vif->sme_state) { case SME_DISCONNECTED: break; case SME_CONNECTING: cfg80211_connect_result(vif->ndev, vif->bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, GFP_KERNEL); break; case SME_CONNECTED: cfg80211_disconnected(vif->ndev, 0, NULL, 0, true, GFP_KERNEL); break; } if (vif->ar->state != ATH6KL_STATE_RECOVERY && (test_bit(CONNECTED, &vif->flags) || test_bit(CONNECT_PEND, &vif->flags))) ath6kl_wmi_disconnect_cmd(vif->ar->wmi, vif->fw_vif_idx); vif->sme_state = SME_DISCONNECTED; clear_bit(CONNECTED, &vif->flags); clear_bit(CONNECT_PEND, &vif->flags); /* Stop netdev queues, needed during recovery */ netif_stop_queue(vif->ndev); netif_carrier_off(vif->ndev); /* disable scanning */ if (vif->ar->state != ATH6KL_STATE_RECOVERY && ath6kl_wmi_scanparams_cmd(vif->ar->wmi, vif->fw_vif_idx, 0xFFFF, 0, 0, 0, 0, 0, 0, 0, 0, 0) != 0) ath6kl_warn("failed to disable scan during stop\n"); ath6kl_cfg80211_scan_complete_event(vif, true); } void ath6kl_cfg80211_stop_all(struct ath6kl *ar) { struct ath6kl_vif *vif; vif = ath6kl_vif_first(ar); if (!vif && ar->state != ATH6KL_STATE_RECOVERY) { /* save the current power mode before enabling power save */ ar->wmi->saved_pwr_mode = ar->wmi->pwr_mode; if (ath6kl_wmi_powermode_cmd(ar->wmi, 0, REC_POWER) != 0) ath6kl_warn("ath6kl_deep_sleep_enable: wmi_powermode_cmd failed\n"); return; } /* * FIXME: we should take ar->list_lock to protect changes in the * vif_list, but that's not trivial to do as ath6kl_cfg80211_stop() * sleeps. */ list_for_each_entry(vif, &ar->vif_list, list) ath6kl_cfg80211_stop(vif); } static void ath6kl_cfg80211_reg_notify(struct wiphy *wiphy, struct regulatory_request *request) { struct ath6kl *ar = wiphy_priv(wiphy); u32 rates[NUM_NL80211_BANDS]; int ret, i; ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "cfg reg_notify %c%c%s%s initiator %d hint_type %d\n", request->alpha2[0], request->alpha2[1], request->intersect ? " intersect" : "", request->processed ? " processed" : "", request->initiator, request->user_reg_hint_type); if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE) return; ret = ath6kl_wmi_set_regdomain_cmd(ar->wmi, request->alpha2); if (ret) { ath6kl_err("failed to set regdomain: %d\n", ret); return; } /* * Firmware will apply the regdomain change only after a scan is * issued and it will send a WMI_REGDOMAIN_EVENTID when it has been * changed. */ for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; ret = ath6kl_wmi_beginscan_cmd(ar->wmi, 0, WMI_LONG_SCAN, false, false, 0, ATH6KL_FG_SCAN_INTERVAL, 0, NULL, false, rates); if (ret) { ath6kl_err("failed to start scan for a regdomain change: %d\n", ret); return; } } static int ath6kl_cfg80211_vif_init(struct ath6kl_vif *vif) { vif->aggr_cntxt = aggr_init(vif); if (!vif->aggr_cntxt) { ath6kl_err("failed to initialize aggr\n"); return -ENOMEM; } timer_setup(&vif->disconnect_timer, disconnect_timer_handler, 0); timer_setup(&vif->sched_scan_timer, ath6kl_wmi_sscan_timer, 0); set_bit(WMM_ENABLED, &vif->flags); spin_lock_init(&vif->if_lock); INIT_LIST_HEAD(&vif->mc_filter); return 0; } void ath6kl_cfg80211_vif_stop(struct ath6kl_vif *vif, bool wmi_ready) { static u8 bcast_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; bool discon_issued; netif_stop_queue(vif->ndev); clear_bit(WLAN_ENABLED, &vif->flags); if (wmi_ready) { discon_issued = test_bit(CONNECTED, &vif->flags) || test_bit(CONNECT_PEND, &vif->flags); ath6kl_disconnect(vif); timer_delete(&vif->disconnect_timer); if (discon_issued) ath6kl_disconnect_event(vif, DISCONNECT_CMD, (vif->nw_type & AP_NETWORK) ? bcast_mac : vif->bssid, 0, NULL, 0); } if (vif->scan_req) { struct cfg80211_scan_info info = { .aborted = true, }; cfg80211_scan_done(vif->scan_req, &info); vif->scan_req = NULL; } /* need to clean up enhanced bmiss detection fw state */ ath6kl_cfg80211_sta_bmiss_enhance(vif, false); } void ath6kl_cfg80211_vif_cleanup(struct ath6kl_vif *vif) { struct ath6kl *ar = vif->ar; struct ath6kl_mc_filter *mc_filter, *tmp; aggr_module_destroy(vif->aggr_cntxt); ar->avail_idx_map |= BIT(vif->fw_vif_idx); if (vif->nw_type == ADHOC_NETWORK) ar->ibss_if_active = false; list_for_each_entry_safe(mc_filter, tmp, &vif->mc_filter, list) { list_del(&mc_filter->list); kfree(mc_filter); } cfg80211_unregister_netdevice(vif->ndev); ar->num_vif--; } static const char ath6kl_gstrings_sta_stats[][ETH_GSTRING_LEN] = { /* Common stats names used by many drivers. */ "tx_pkts_nic", "tx_bytes_nic", "rx_pkts_nic", "rx_bytes_nic", /* TX stats. */ "d_tx_ucast_pkts", "d_tx_bcast_pkts", "d_tx_ucast_bytes", "d_tx_bcast_bytes", "d_tx_rts_ok", "d_tx_error", "d_tx_fail", "d_tx_retry", "d_tx_multi_retry", "d_tx_rts_fail", "d_tx_tkip_counter_measures", /* RX Stats. */ "d_rx_ucast_pkts", "d_rx_ucast_rate", "d_rx_bcast_pkts", "d_rx_ucast_bytes", "d_rx_bcast_bytes", "d_rx_frag_pkt", "d_rx_error", "d_rx_crc_err", "d_rx_keycache_miss", "d_rx_decrypt_crc_err", "d_rx_duplicate_frames", "d_rx_mic_err", "d_rx_tkip_format_err", "d_rx_ccmp_format_err", "d_rx_ccmp_replay_err", /* Misc stats. */ "d_beacon_miss", "d_num_connects", "d_num_disconnects", "d_beacon_avg_rssi", "d_arp_received", "d_arp_matched", "d_arp_replied" }; #define ATH6KL_STATS_LEN ARRAY_SIZE(ath6kl_gstrings_sta_stats) static int ath6kl_get_sset_count(struct net_device *dev, int sset) { int rv = 0; if (sset == ETH_SS_STATS) rv += ATH6KL_STATS_LEN; if (rv == 0) return -EOPNOTSUPP; return rv; } static void ath6kl_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct ath6kl_vif *vif = netdev_priv(dev); struct ath6kl *ar = vif->ar; int i = 0; struct target_stats *tgt_stats; memset(data, 0, sizeof(u64) * ATH6KL_STATS_LEN); ath6kl_read_tgt_stats(ar, vif); tgt_stats = &vif->target_stats; data[i++] = tgt_stats->tx_ucast_pkt + tgt_stats->tx_bcast_pkt; data[i++] = tgt_stats->tx_ucast_byte + tgt_stats->tx_bcast_byte; data[i++] = tgt_stats->rx_ucast_pkt + tgt_stats->rx_bcast_pkt; data[i++] = tgt_stats->rx_ucast_byte + tgt_stats->rx_bcast_byte; data[i++] = tgt_stats->tx_ucast_pkt; data[i++] = tgt_stats->tx_bcast_pkt; data[i++] = tgt_stats->tx_ucast_byte; data[i++] = tgt_stats->tx_bcast_byte; data[i++] = tgt_stats->tx_rts_success_cnt; data[i++] = tgt_stats->tx_err; data[i++] = tgt_stats->tx_fail_cnt; data[i++] = tgt_stats->tx_retry_cnt; data[i++] = tgt_stats->tx_mult_retry_cnt; data[i++] = tgt_stats->tx_rts_fail_cnt; data[i++] = tgt_stats->tkip_cnter_measures_invoked; data[i++] = tgt_stats->rx_ucast_pkt; data[i++] = tgt_stats->rx_ucast_rate; data[i++] = tgt_stats->rx_bcast_pkt; data[i++] = tgt_stats->rx_ucast_byte; data[i++] = tgt_stats->rx_bcast_byte; data[i++] = tgt_stats->rx_frgment_pkt; data[i++] = tgt_stats->rx_err; data[i++] = tgt_stats->rx_crc_err; data[i++] = tgt_stats->rx_key_cache_miss; data[i++] = tgt_stats->rx_decrypt_err; data[i++] = tgt_stats->rx_dupl_frame; data[i++] = tgt_stats->tkip_local_mic_fail; data[i++] = tgt_stats->tkip_fmt_err; data[i++] = tgt_stats->ccmp_fmt_err; data[i++] = tgt_stats->ccmp_replays; data[i++] = tgt_stats->cs_bmiss_cnt; data[i++] = tgt_stats->cs_connect_cnt; data[i++] = tgt_stats->cs_discon_cnt; data[i++] = tgt_stats->cs_ave_beacon_rssi; data[i++] = tgt_stats->arp_received; data[i++] = tgt_stats->arp_matched; data[i++] = tgt_stats->arp_replied; if (i != ATH6KL_STATS_LEN) { WARN_ON_ONCE(1); ath6kl_err("ethtool stats error, i: %d STATS_LEN: %d\n", i, (int)ATH6KL_STATS_LEN); } } /* These stats are per NIC, not really per vdev, so we just ignore dev. */ static void ath6kl_get_strings(struct net_device *dev, u32 sset, u8 *data) { int sz_sta_stats = 0; if (sset == ETH_SS_STATS) { sz_sta_stats = sizeof(ath6kl_gstrings_sta_stats); memcpy(data, ath6kl_gstrings_sta_stats, sz_sta_stats); } } static const struct ethtool_ops ath6kl_ethtool_ops = { .get_drvinfo = cfg80211_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = ath6kl_get_strings, .get_ethtool_stats = ath6kl_get_stats, .get_sset_count = ath6kl_get_sset_count, }; struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, u8 fw_vif_idx, u8 nw_type) { struct net_device *ndev; struct ath6kl_vif *vif; u8 addr[ETH_ALEN]; ndev = alloc_netdev(sizeof(*vif), name, name_assign_type, ether_setup); if (!ndev) return NULL; vif = netdev_priv(ndev); ndev->ieee80211_ptr = &vif->wdev; vif->wdev.wiphy = ar->wiphy; vif->ar = ar; vif->ndev = ndev; SET_NETDEV_DEV(ndev, wiphy_dev(vif->wdev.wiphy)); vif->wdev.netdev = ndev; vif->wdev.iftype = type; vif->fw_vif_idx = fw_vif_idx; vif->nw_type = nw_type; vif->next_mode = nw_type; vif->listen_intvl_t = ATH6KL_DEFAULT_LISTEN_INTVAL; vif->bmiss_time_t = ATH6KL_DEFAULT_BMISS_TIME; vif->bg_scan_period = 0; vif->htcap[NL80211_BAND_2GHZ].ht_enable = true; vif->htcap[NL80211_BAND_5GHZ].ht_enable = true; ether_addr_copy(addr, ar->mac_addr); if (fw_vif_idx != 0) { addr[0] = (addr[0] ^ (1 << fw_vif_idx)) | 0x2; if (test_bit(ATH6KL_FW_CAPABILITY_CUSTOM_MAC_ADDR, ar->fw_capabilities)) addr[4] ^= 0x80; } eth_hw_addr_set(ndev, addr); init_netdev(ndev); ath6kl_init_control_info(vif); if (ath6kl_cfg80211_vif_init(vif)) goto err; netdev_set_default_ethtool_ops(ndev, &ath6kl_ethtool_ops); if (cfg80211_register_netdevice(ndev)) goto err; ar->avail_idx_map &= ~BIT(fw_vif_idx); vif->sme_state = SME_DISCONNECTED; set_bit(WLAN_ENABLED, &vif->flags); ar->wlan_pwr_state = WLAN_POWER_STATE_ON; if (type == NL80211_IFTYPE_ADHOC) ar->ibss_if_active = true; spin_lock_bh(&ar->list_lock); list_add_tail(&vif->list, &ar->vif_list); spin_unlock_bh(&ar->list_lock); return &vif->wdev; err: aggr_module_destroy(vif->aggr_cntxt); free_netdev(ndev); return NULL; } #ifdef CONFIG_PM static const struct wiphy_wowlan_support ath6kl_wowlan_support = { .flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT | WIPHY_WOWLAN_GTK_REKEY_FAILURE | WIPHY_WOWLAN_SUPPORTS_GTK_REKEY | WIPHY_WOWLAN_EAP_IDENTITY_REQ | WIPHY_WOWLAN_4WAY_HANDSHAKE, .n_patterns = WOW_MAX_FILTERS_PER_LIST, .pattern_min_len = 1, .pattern_max_len = WOW_PATTERN_SIZE, }; #endif int ath6kl_cfg80211_init(struct ath6kl *ar) { struct wiphy *wiphy = ar->wiphy; bool band_2gig = false, band_5gig = false, ht = false; int ret; wiphy->mgmt_stypes = ath6kl_mgmt_stypes; wiphy->max_remain_on_channel_duration = 5000; /* set device pointer for wiphy */ set_wiphy_dev(wiphy, ar->dev); wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP); if (ar->p2p) { wiphy->interface_modes |= BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_P2P_CLIENT); } if (IS_ENABLED(CONFIG_ATH6KL_REGDOMAIN) && test_bit(ATH6KL_FW_CAPABILITY_REGDOMAIN, ar->fw_capabilities)) { wiphy->reg_notifier = ath6kl_cfg80211_reg_notify; ar->wiphy->features |= NL80211_FEATURE_CELL_BASE_REG_HINTS; } /* max num of ssids that can be probed during scanning */ wiphy->max_scan_ssids = MAX_PROBED_SSIDS; /* max num of ssids that can be matched after scan */ if (test_bit(ATH6KL_FW_CAPABILITY_SCHED_SCAN_MATCH_LIST, ar->fw_capabilities)) wiphy->max_match_sets = MAX_PROBED_SSIDS; wiphy->max_scan_ie_len = 1000; /* FIX: what is correct limit? */ switch (ar->hw.cap) { case WMI_11AN_CAP: ht = true; fallthrough; case WMI_11A_CAP: band_5gig = true; break; case WMI_11GN_CAP: ht = true; fallthrough; case WMI_11G_CAP: band_2gig = true; break; case WMI_11AGN_CAP: ht = true; fallthrough; case WMI_11AG_CAP: band_2gig = true; band_5gig = true; break; default: ath6kl_err("invalid phy capability!\n"); return -EINVAL; } /* * Even if the fw has HT support, advertise HT cap only when * the firmware has support to override RSN capability, otherwise * 4-way handshake would fail. */ if (!(ht && test_bit(ATH6KL_FW_CAPABILITY_RSN_CAP_OVERRIDE, ar->fw_capabilities))) { ath6kl_band_2ghz.ht_cap.cap = 0; ath6kl_band_2ghz.ht_cap.ht_supported = false; ath6kl_band_5ghz.ht_cap.cap = 0; ath6kl_band_5ghz.ht_cap.ht_supported = false; if (ht) ath6kl_err("Firmware lacks RSN-CAP-OVERRIDE, so HT (802.11n) is disabled."); } if (test_bit(ATH6KL_FW_CAPABILITY_64BIT_RATES, ar->fw_capabilities)) { ath6kl_band_2ghz.ht_cap.mcs.rx_mask[0] = 0xff; ath6kl_band_5ghz.ht_cap.mcs.rx_mask[0] = 0xff; ath6kl_band_2ghz.ht_cap.mcs.rx_mask[1] = 0xff; ath6kl_band_5ghz.ht_cap.mcs.rx_mask[1] = 0xff; ar->hw.tx_ant = 0x3; /* mask, 2 antenna */ ar->hw.rx_ant = 0x3; } else { ath6kl_band_2ghz.ht_cap.mcs.rx_mask[0] = 0xff; ath6kl_band_5ghz.ht_cap.mcs.rx_mask[0] = 0xff; ar->hw.tx_ant = 1; ar->hw.rx_ant = 1; } wiphy->available_antennas_tx = ar->hw.tx_ant; wiphy->available_antennas_rx = ar->hw.rx_ant; if (band_2gig) wiphy->bands[NL80211_BAND_2GHZ] = &ath6kl_band_2ghz; if (band_5gig) wiphy->bands[NL80211_BAND_5GHZ] = &ath6kl_band_5ghz; wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; wiphy->cipher_suites = cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); #ifdef CONFIG_PM wiphy->wowlan = &ath6kl_wowlan_support; #endif wiphy->max_sched_scan_ssids = MAX_PROBED_SSIDS; ar->wiphy->flags |= WIPHY_FLAG_SUPPORTS_FW_ROAM | WIPHY_FLAG_HAVE_AP_SME | WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL | WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD; if (test_bit(ATH6KL_FW_CAPABILITY_SCHED_SCAN_V2, ar->fw_capabilities)) ar->wiphy->max_sched_scan_reqs = 1; if (test_bit(ATH6KL_FW_CAPABILITY_INACTIVITY_TIMEOUT, ar->fw_capabilities)) ar->wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER; ar->wiphy->probe_resp_offload = NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS | NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 | NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P; ret = wiphy_register(wiphy); if (ret < 0) { ath6kl_err("couldn't register wiphy device\n"); return ret; } ar->wiphy_registered = true; return 0; } void ath6kl_cfg80211_cleanup(struct ath6kl *ar) { wiphy_unregister(ar->wiphy); ar->wiphy_registered = false; } struct ath6kl *ath6kl_cfg80211_create(void) { struct ath6kl *ar; struct wiphy *wiphy; /* create a new wiphy for use with cfg80211 */ wiphy = wiphy_new(&ath6kl_cfg80211_ops, sizeof(struct ath6kl)); if (!wiphy) { ath6kl_err("couldn't allocate wiphy device\n"); return NULL; } ar = wiphy_priv(wiphy); ar->wiphy = wiphy; return ar; } /* Note: ar variable must not be accessed after calling this! */ void ath6kl_cfg80211_destroy(struct ath6kl *ar) { int i; for (i = 0; i < AP_MAX_NUM_STA; i++) kfree(ar->sta_list[i].aggr_conn); wiphy_free(ar->wiphy); }
1 1 1 1 1 1 19 2 4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 // SPDX-License-Identifier: GPL-2.0-or-later /* * The Virtual DVB test driver serves as a reference DVB driver and helps * validate the existing APIs in the media subsystem. It can also aid * developers working on userspace applications. * * Copyright (C) 2020 Daniel W. S. Almeida * Based on the example driver written by Emard <emard@softhome.net> */ #include <linux/errno.h> #include <linux/i2c.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/random.h> #include <linux/ratelimit.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/workqueue.h> #include <media/dvb_frontend.h> #include "vidtv_demod.h" #define POLL_THRD_TIME 2000 /* ms */ static const struct vidtv_demod_cnr_to_qual_s vidtv_demod_c_cnr_2_qual[] = { /* from libdvbv5 source code, in milli db */ { QAM_256, FEC_NONE, 34000, 38000}, { QAM_64, FEC_NONE, 30000, 34000}, }; static const struct vidtv_demod_cnr_to_qual_s vidtv_demod_s_cnr_2_qual[] = { /* from libdvbv5 source code, in milli db */ { QPSK, FEC_1_2, 7000, 10000}, { QPSK, FEC_2_3, 9000, 12000}, { QPSK, FEC_3_4, 10000, 13000}, { QPSK, FEC_5_6, 11000, 14000}, { QPSK, FEC_7_8, 12000, 15000}, }; static const struct vidtv_demod_cnr_to_qual_s vidtv_demod_s2_cnr_2_qual[] = { /* from libdvbv5 source code, in milli db */ { QPSK, FEC_1_2, 9000, 12000}, { QPSK, FEC_2_3, 11000, 14000}, { QPSK, FEC_3_4, 12000, 15000}, { QPSK, FEC_5_6, 12000, 15000}, { QPSK, FEC_8_9, 13000, 16000}, { QPSK, FEC_9_10, 13500, 16500}, { PSK_8, FEC_2_3, 14500, 17500}, { PSK_8, FEC_3_4, 16000, 19000}, { PSK_8, FEC_5_6, 17500, 20500}, { PSK_8, FEC_8_9, 19000, 22000}, }; static const struct vidtv_demod_cnr_to_qual_s vidtv_demod_t_cnr_2_qual[] = { /* from libdvbv5 source code, in milli db*/ { QPSK, FEC_1_2, 4100, 5900}, { QPSK, FEC_2_3, 6100, 9600}, { QPSK, FEC_3_4, 7200, 12400}, { QPSK, FEC_5_6, 8500, 15600}, { QPSK, FEC_7_8, 9200, 17500}, { QAM_16, FEC_1_2, 9800, 11800}, { QAM_16, FEC_2_3, 12100, 15300}, { QAM_16, FEC_3_4, 13400, 18100}, { QAM_16, FEC_5_6, 14800, 21300}, { QAM_16, FEC_7_8, 15700, 23600}, { QAM_64, FEC_1_2, 14000, 16000}, { QAM_64, FEC_2_3, 19900, 25400}, { QAM_64, FEC_3_4, 24900, 27900}, { QAM_64, FEC_5_6, 21300, 23300}, { QAM_64, FEC_7_8, 22000, 24000}, }; static const struct vidtv_demod_cnr_to_qual_s *vidtv_match_cnr_s(struct dvb_frontend *fe) { const struct vidtv_demod_cnr_to_qual_s *cnr2qual = NULL; struct device *dev = fe->dvb->device; struct dtv_frontend_properties *c; u32 array_size = 0; u32 i; c = &fe->dtv_property_cache; switch (c->delivery_system) { case SYS_DVBT: case SYS_DVBT2: cnr2qual = vidtv_demod_t_cnr_2_qual; array_size = ARRAY_SIZE(vidtv_demod_t_cnr_2_qual); break; case SYS_DVBS: cnr2qual = vidtv_demod_s_cnr_2_qual; array_size = ARRAY_SIZE(vidtv_demod_s_cnr_2_qual); break; case SYS_DVBS2: cnr2qual = vidtv_demod_s2_cnr_2_qual; array_size = ARRAY_SIZE(vidtv_demod_s2_cnr_2_qual); break; case SYS_DVBC_ANNEX_A: cnr2qual = vidtv_demod_c_cnr_2_qual; array_size = ARRAY_SIZE(vidtv_demod_c_cnr_2_qual); break; default: dev_warn_ratelimited(dev, "%s: unsupported delivery system: %u\n", __func__, c->delivery_system); break; } for (i = 0; i < array_size; i++) if (cnr2qual[i].modulation == c->modulation && cnr2qual[i].fec == c->fec_inner) return &cnr2qual[i]; return NULL; /* not found */ } static void vidtv_clean_stats(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; /* Fill the length of each status counter */ /* Signal is always available */ c->strength.len = 1; c->strength.stat[0].scale = FE_SCALE_DECIBEL; c->strength.stat[0].svalue = 0; /* Usually available only after Viterbi lock */ c->cnr.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->cnr.stat[0].svalue = 0; c->cnr.len = 1; /* Those depends on full lock */ c->pre_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->pre_bit_error.stat[0].uvalue = 0; c->pre_bit_error.len = 1; c->pre_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->pre_bit_count.stat[0].uvalue = 0; c->pre_bit_count.len = 1; c->post_bit_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->post_bit_error.stat[0].uvalue = 0; c->post_bit_error.len = 1; c->post_bit_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->post_bit_count.stat[0].uvalue = 0; c->post_bit_count.len = 1; c->block_error.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->block_error.stat[0].uvalue = 0; c->block_error.len = 1; c->block_count.stat[0].scale = FE_SCALE_NOT_AVAILABLE; c->block_count.stat[0].uvalue = 0; c->block_count.len = 1; } static void vidtv_demod_update_stats(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct vidtv_demod_state *state = fe->demodulator_priv; u32 scale; if (state->status & FE_HAS_LOCK) { scale = FE_SCALE_COUNTER; c->cnr.stat[0].scale = FE_SCALE_DECIBEL; } else { scale = FE_SCALE_NOT_AVAILABLE; c->cnr.stat[0].scale = scale; } c->pre_bit_error.stat[0].scale = scale; c->pre_bit_count.stat[0].scale = scale; c->post_bit_error.stat[0].scale = scale; c->post_bit_count.stat[0].scale = scale; c->block_error.stat[0].scale = scale; c->block_count.stat[0].scale = scale; /* * Add a 0.5% of randomness at the signal strength and CNR, * and make them different, as we want to have something closer * to a real case scenario. * * Also, usually, signal strength is a negative number in dBm. */ c->strength.stat[0].svalue = state->tuner_cnr; c->strength.stat[0].svalue -= get_random_u32_below(state->tuner_cnr / 50); c->strength.stat[0].svalue -= 68000; /* Adjust to a better range */ c->cnr.stat[0].svalue = state->tuner_cnr; c->cnr.stat[0].svalue -= get_random_u32_below(state->tuner_cnr / 50); } static int vidtv_demod_read_status(struct dvb_frontend *fe, enum fe_status *status) { struct vidtv_demod_state *state = fe->demodulator_priv; const struct vidtv_demod_cnr_to_qual_s *cnr2qual = NULL; struct vidtv_demod_config *config = &state->config; u16 snr = 0; /* Simulate random lost of signal due to a bad-tuned channel */ cnr2qual = vidtv_match_cnr_s(&state->frontend); if (cnr2qual && state->tuner_cnr < cnr2qual->cnr_good && state->frontend.ops.tuner_ops.get_rf_strength) { state->frontend.ops.tuner_ops.get_rf_strength(&state->frontend, &snr); if (snr < cnr2qual->cnr_ok) { /* eventually lose the TS lock */ if (get_random_u32_below(100) < config->drop_tslock_prob_on_low_snr) state->status = 0; } else { /* recover if the signal improves */ if (get_random_u32_below(100) < config->recover_tslock_prob_on_good_snr) state->status = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK; } } vidtv_demod_update_stats(&state->frontend); *status = state->status; return 0; } static int vidtv_demod_read_signal_strength(struct dvb_frontend *fe, u16 *strength) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; *strength = c->strength.stat[0].uvalue; return 0; } /* * NOTE: * This is implemented here just to be used as an example for real * demod drivers. * * Should only be implemented if it actually reads something from the hardware. * Also, it should check for the locks, in order to avoid report wrong data * to userspace. */ static int vidtv_demod_get_frontend(struct dvb_frontend *fe, struct dtv_frontend_properties *p) { return 0; } static int vidtv_demod_set_frontend(struct dvb_frontend *fe) { struct vidtv_demod_state *state = fe->demodulator_priv; u32 tuner_status = 0; int ret; if (!fe->ops.tuner_ops.set_params) return 0; fe->ops.tuner_ops.set_params(fe); /* store the CNR returned by the tuner */ ret = fe->ops.tuner_ops.get_rf_strength(fe, &state->tuner_cnr); if (ret < 0) return ret; fe->ops.tuner_ops.get_status(fe, &tuner_status); state->status = (state->tuner_cnr > 0) ? FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK : 0; vidtv_demod_update_stats(fe); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); return 0; } /* * NOTE: * This is implemented here just to be used as an example for real * demod drivers. * * Should only be implemented if the demod has support for DVB-S or DVB-S2 */ static int vidtv_demod_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone) { return 0; } /* * NOTE: * This is implemented here just to be used as an example for real * demod drivers. * * Should only be implemented if the demod has support for DVB-S or DVB-S2 */ static int vidtv_demod_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage) { return 0; } /* * NOTE: * This is implemented here just to be used as an example for real * demod drivers. * * Should only be implemented if the demod has support for DVB-S or DVB-S2 */ static int vidtv_send_diseqc_msg(struct dvb_frontend *fe, struct dvb_diseqc_master_cmd *cmd) { return 0; } /* * NOTE: * This is implemented here just to be used as an example for real * demod drivers. * * Should only be implemented if the demod has support for DVB-S or DVB-S2 */ static int vidtv_diseqc_send_burst(struct dvb_frontend *fe, enum fe_sec_mini_cmd burst) { return 0; } static void vidtv_demod_release(struct dvb_frontend *fe) { struct vidtv_demod_state *state = fe->demodulator_priv; kfree(state); } static const struct dvb_frontend_ops vidtv_demod_ops = { .delsys = { SYS_DVBT, SYS_DVBT2, SYS_DVBC_ANNEX_A, SYS_DVBS, SYS_DVBS2, }, .info = { .name = "Dummy demod for DVB-T/T2/C/S/S2", .frequency_min_hz = 51 * MHz, .frequency_max_hz = 2150 * MHz, .frequency_stepsize_hz = 62500, .frequency_tolerance_hz = 29500 * kHz, .symbol_rate_min = 1000000, .symbol_rate_max = 45000000, .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_4_5 | FE_CAN_FEC_5_6 | FE_CAN_FEC_6_7 | FE_CAN_FEC_7_8 | FE_CAN_FEC_8_9 | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_32 | FE_CAN_QAM_128 | FE_CAN_QAM_256 | FE_CAN_QAM_AUTO | FE_CAN_QPSK | FE_CAN_FEC_AUTO | FE_CAN_INVERSION_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_HIERARCHY_AUTO, }, .release = vidtv_demod_release, .set_frontend = vidtv_demod_set_frontend, .get_frontend = vidtv_demod_get_frontend, .read_status = vidtv_demod_read_status, .read_signal_strength = vidtv_demod_read_signal_strength, /* For DVB-S/S2 */ .set_voltage = vidtv_demod_set_voltage, .set_tone = vidtv_demod_set_tone, .diseqc_send_master_cmd = vidtv_send_diseqc_msg, .diseqc_send_burst = vidtv_diseqc_send_burst, }; static const struct i2c_device_id vidtv_demod_i2c_id_table[] = { { "dvb_vidtv_demod" }, {} }; MODULE_DEVICE_TABLE(i2c, vidtv_demod_i2c_id_table); static int vidtv_demod_i2c_probe(struct i2c_client *client) { struct vidtv_tuner_config *config = client->dev.platform_data; struct vidtv_demod_state *state; /* allocate memory for the internal state */ state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; /* create dvb_frontend */ memcpy(&state->frontend.ops, &vidtv_demod_ops, sizeof(struct dvb_frontend_ops)); memcpy(&state->config, config, sizeof(state->config)); state->frontend.demodulator_priv = state; i2c_set_clientdata(client, state); vidtv_clean_stats(&state->frontend); return 0; } static void vidtv_demod_i2c_remove(struct i2c_client *client) { struct vidtv_demod_state *state = i2c_get_clientdata(client); kfree(state); } static struct i2c_driver vidtv_demod_i2c_driver = { .driver = { .name = "dvb_vidtv_demod", .suppress_bind_attrs = true, }, .probe = vidtv_demod_i2c_probe, .remove = vidtv_demod_i2c_remove, .id_table = vidtv_demod_i2c_id_table, }; module_i2c_driver(vidtv_demod_i2c_driver); MODULE_DESCRIPTION("Virtual DVB Demodulator Driver"); MODULE_AUTHOR("Daniel W. S. Almeida"); MODULE_LICENSE("GPL");
38729 38928 39093 40917 41176 684 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __X86_KERNEL_FPU_CONTEXT_H #define __X86_KERNEL_FPU_CONTEXT_H #include <asm/fpu/xstate.h> #include <asm/trace/fpu.h> /* Functions related to FPU context tracking */ /* * The in-register FPU state for an FPU context on a CPU is assumed to be * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx * matches the FPU. * * If the FPU register state is valid, the kernel can skip restoring the * FPU state from memory. * * Any code that clobbers the FPU registers or updates the in-memory * FPU state for a task MUST let the rest of the kernel know that the * FPU registers are no longer valid for this task. * * Invalidate a resource you control: CPU if using the CPU for something else * (with preemption disabled), FPU for the current task, or a task that * is prevented from running by the current task. */ static inline void __cpu_invalidate_fpregs_state(void) { __this_cpu_write(fpu_fpregs_owner_ctx, NULL); } static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) { fpu->last_cpu = -1; } static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } static inline void fpregs_deactivate(struct fpu *fpu) { __this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); } static inline void fpregs_activate(struct fpu *fpu) { __this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); } /* Internal helper for switch_fpu_return() and signal frame setup */ static inline void fpregs_restore_userregs(void) { struct fpu *fpu = x86_task_fpu(current); int cpu = smp_processor_id(); if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { /* * This restores _all_ xstate which has not been * established yet. * * If PKRU is enabled, then the PKRU value is already * correct because it was either set in switch_to() or in * flush_thread(). So it is excluded because it might be * not up to date in current->thread.fpu->xsave state. * * XFD state is handled in restore_fpregs_from_fpstate(). */ restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE); fpregs_activate(fpu); fpu->last_cpu = cpu; } clear_thread_flag(TIF_NEED_FPU_LOAD); } #endif
3086 2956 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2025 Google LLC */ #undef TRACE_SYSTEM #define TRACE_SYSTEM usbcore #if !defined(_USB_CORE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _USB_CORE_TRACE_H #include <linux/types.h> #include <linux/tracepoint.h> #include <linux/usb.h> DECLARE_EVENT_CLASS(usb_core_log_usb_device, TP_PROTO(struct usb_device *udev), TP_ARGS(udev), TP_STRUCT__entry( __string(name, dev_name(&udev->dev)) __field(enum usb_device_speed, speed) __field(enum usb_device_state, state) __field(unsigned short, bus_mA) __field(unsigned, authorized) ), TP_fast_assign( __assign_str(name); __entry->speed = udev->speed; __entry->state = udev->state; __entry->bus_mA = udev->bus_mA; __entry->authorized = udev->authorized; ), TP_printk("usb %s speed %s state %s %dmA [%s]", __get_str(name), usb_speed_string(__entry->speed), usb_state_string(__entry->state), __entry->bus_mA, __entry->authorized ? "authorized" : "unauthorized") ); DEFINE_EVENT(usb_core_log_usb_device, usb_set_device_state, TP_PROTO(struct usb_device *udev), TP_ARGS(udev) ); DEFINE_EVENT(usb_core_log_usb_device, usb_alloc_dev, TP_PROTO(struct usb_device *udev), TP_ARGS(udev) ); #endif /* _USB_CORE_TRACE_H */ /* this part has to be here */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
43 43 43 1 10 43 15 4 5 30 6 23 7 5 7 14 14 19 11 12 6 12 9 6 3 18 5 5 3 2 5 12 12 19 2 8 10 18 7 5 4 4 7 13 2 2 10 2 1 1 6 17 1 16 4 3 10 13 1 34 1 2 6 2 23 23 11 12 22 1 1 23 23 26 18 5 2 6 3 4 12 14 39 5 14 21 19 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 // SPDX-License-Identifier: GPL-2.0 /* * fs/timerfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * * * Thanks to Thomas Gleixner for code reviews and useful comments. * */ #include <linux/alarmtimer.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/time.h> #include <linux/hrtimer.h> #include <linux/anon_inodes.h> #include <linux/timerfd.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/rcupdate.h> #include <linux/time_namespace.h> struct timerfd_ctx { union { struct hrtimer tmr; struct alarm alarm; } t; ktime_t tintv; ktime_t moffs; wait_queue_head_t wqh; u64 ticks; int clockid; short unsigned expired; short unsigned settime_flags; /* to show in fdinfo */ struct rcu_head rcu; struct list_head clist; spinlock_t cancel_lock; bool might_cancel; }; static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); static inline bool isalarm(struct timerfd_ctx *ctx) { return ctx->clockid == CLOCK_REALTIME_ALARM || ctx->clockid == CLOCK_BOOTTIME_ALARM; } /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, * tintv != 0) until the timer is accessed. */ static void timerfd_triggered(struct timerfd_ctx *ctx) { unsigned long flags; spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; ctx->ticks++; wake_up_locked_poll(&ctx->wqh, EPOLLIN); spin_unlock_irqrestore(&ctx->wqh.lock, flags); } static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) { struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, t.tmr); timerfd_triggered(ctx); return HRTIMER_NORESTART; } static void timerfd_alarmproc(struct alarm *alarm, ktime_t now) { struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, t.alarm); timerfd_triggered(ctx); } /* * Called when the clock was set to cancel the timers in the cancel * list. This will wake up processes waiting on these timers. The * wake-up requires ctx->ticks to be non zero, therefore we increment * it before calling wake_up_locked(). */ void timerfd_clock_was_set(void) { ktime_t moffs = ktime_mono_to_real(0); struct timerfd_ctx *ctx; unsigned long flags; rcu_read_lock(); list_for_each_entry_rcu(ctx, &cancel_list, clist) { if (!ctx->might_cancel) continue; spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->moffs != moffs) { ctx->moffs = KTIME_MAX; ctx->ticks++; wake_up_locked_poll(&ctx->wqh, EPOLLIN); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } rcu_read_unlock(); } static void timerfd_resume_work(struct work_struct *work) { timerfd_clock_was_set(); } static DECLARE_WORK(timerfd_work, timerfd_resume_work); /* * Invoked from timekeeping_resume(). Defer the actual update to work so * timerfd_clock_was_set() runs in task context. */ void timerfd_resume(void) { schedule_work(&timerfd_work); } static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { ctx->might_cancel = false; spin_lock(&cancel_lock); list_del_rcu(&ctx->clist); spin_unlock(&cancel_lock); } } static void timerfd_remove_cancel(struct timerfd_ctx *ctx) { spin_lock(&ctx->cancel_lock); __timerfd_remove_cancel(ctx); spin_unlock(&ctx->cancel_lock); } static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs != KTIME_MAX) return false; ctx->moffs = ktime_mono_to_real(0); return true; } static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } } else { __timerfd_remove_cancel(ctx); } spin_unlock(&ctx->cancel_lock); } static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { ktime_t remaining; if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); return remaining < 0 ? 0: remaining; } static int timerfd_setup(struct timerfd_ctx *ctx, int flags, const struct itimerspec64 *ktmr) { enum hrtimer_mode htmode; ktime_t texp; int clockid = ctx->clockid; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; texp = timespec64_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec64_to_ktime(ktmr->it_interval); if (isalarm(ctx)) { alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); } else { hrtimer_setup(&ctx->t.tmr, timerfd_tmrproc, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); } if (texp != 0) { if (flags & TFD_TIMER_ABSTIME) texp = timens_ktime_to_host(clockid, texp); if (isalarm(ctx)) { if (flags & TFD_TIMER_ABSTIME) alarm_start(&ctx->t.alarm, texp); else alarm_start_relative(&ctx->t.alarm, texp); } else { hrtimer_start(&ctx->t.tmr, texp, htmode); } if (timerfd_canceled(ctx)) return -ECANCELED; } ctx->settime_flags = flags & TFD_SETTIME_FLAGS; return 0; } static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; timerfd_remove_cancel(ctx); if (isalarm(ctx)) alarm_cancel(&ctx->t.alarm); else hrtimer_cancel(&ctx->t.tmr); kfree_rcu(ctx, rcu); return 0; } static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; __poll_t events = 0; unsigned long flags; poll_wait(file, &ctx->wqh, wait); spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->ticks) events |= EPOLLIN; spin_unlock_irqrestore(&ctx->wqh.lock, flags); return events; } static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct timerfd_ctx *ctx = file->private_data; ssize_t res; u64 ticks = 0; if (iov_iter_count(to) < sizeof(ticks)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); /* * If clock has changed, we do not care about the * ticks and we do not rearm the timer. Userspace must * reevaluate anyway. */ if (timerfd_canceled(ctx)) { ctx->ticks = 0; ctx->expired = 0; res = -ECANCELED; } if (ctx->ticks) { ticks = ctx->ticks; if (ctx->expired && ctx->tintv) { /* * If tintv != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. */ if (isalarm(ctx)) { ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } } ctx->expired = 0; ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); if (ticks) { res = copy_to_iter(&ticks, sizeof(ticks), to); if (!res) res = -EFAULT; } return res; } #ifdef CONFIG_PROC_FS static void timerfd_show(struct seq_file *m, struct file *file) { struct timerfd_ctx *ctx = file->private_data; struct timespec64 value, interval; spin_lock_irq(&ctx->wqh.lock); value = ktime_to_timespec64(timerfd_get_remaining(ctx)); interval = ktime_to_timespec64(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); seq_printf(m, "clockid: %d\n" "ticks: %llu\n" "settime flags: 0%o\n" "it_value: (%llu, %llu)\n" "it_interval: (%llu, %llu)\n", ctx->clockid, (unsigned long long)ctx->ticks, ctx->settime_flags, (unsigned long long)value.tv_sec, (unsigned long long)value.tv_nsec, (unsigned long long)interval.tv_sec, (unsigned long long)interval.tv_nsec); } #else #define timerfd_show NULL #endif #ifdef CONFIG_CHECKPOINT_RESTORE static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct timerfd_ctx *ctx = file->private_data; int ret = 0; switch (cmd) { case TFD_IOC_SET_TICKS: { u64 ticks; if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) return -EFAULT; if (!ticks) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (!timerfd_canceled(ctx)) { ctx->ticks = ticks; wake_up_locked_poll(&ctx->wqh, EPOLLIN); } else ret = -ECANCELED; spin_unlock_irq(&ctx->wqh.lock); break; } default: ret = -ENOTTY; break; } return ret; } #else #define timerfd_ioctl NULL #endif static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, .read_iter = timerfd_read_iter, .llseek = noop_llseek, .show_fdinfo = timerfd_show, .unlocked_ioctl = timerfd_ioctl, }; SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { struct timerfd_ctx *ctx __free(kfree) = NULL; int ret; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && clockid != CLOCK_BOOTTIME && clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; if ((clockid == CLOCK_REALTIME_ALARM || clockid == CLOCK_BOOTTIME_ALARM) && !capable(CAP_WAKE_ALARM)) return -EPERM; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; init_waitqueue_head(&ctx->wqh); spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; if (isalarm(ctx)) alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); else hrtimer_setup(&ctx->t.tmr, timerfd_tmrproc, clockid, HRTIMER_MODE_ABS); ctx->moffs = ktime_mono_to_real(0); ret = FD_ADD(flags & TFD_SHARED_FCNTL_FLAGS, anon_inode_getfile_fmode("[timerfd]", &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS), FMODE_NOWAIT)); if (ret >= 0) retain_and_null_ptr(ctx); return ret; } static int do_timerfd_settime(int ufd, int flags, const struct itimerspec64 *new, struct itimerspec64 *old) { struct timerfd_ctx *ctx; int ret; if ((flags & ~TFD_SETTIME_FLAGS) || !itimerspec64_valid(new)) return -EINVAL; CLASS(fd, f)(ufd); if (fd_empty(f)) return -EBADF; if (fd_file(f)->f_op != &timerfd_fops) return -EINVAL; ctx = fd_file(f)->private_data; if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) return -EPERM; timerfd_setup_cancel(ctx, flags); /* * We need to stop the existing timer before reprogramming * it to the new values. */ for (;;) { spin_lock_irq(&ctx->wqh.lock); if (isalarm(ctx)) { if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) break; } else { if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) break; } spin_unlock_irq(&ctx->wqh.lock); if (isalarm(ctx)) hrtimer_cancel_wait_running(&ctx->t.alarm.timer); else hrtimer_cancel_wait_running(&ctx->t.tmr); } /* * If the timer is expired and it's periodic, we need to advance it * because the caller may want to know the previous expiration time. * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ if (ctx->expired && ctx->tintv) { if (isalarm(ctx)) alarm_forward_now(&ctx->t.alarm, ctx->tintv); else hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); } old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); old->it_interval = ktime_to_timespec64(ctx->tintv); /* * Re-program the timer to the new value ... */ ret = timerfd_setup(ctx, flags, new); spin_unlock_irq(&ctx->wqh.lock); return ret; } static int do_timerfd_gettime(int ufd, struct itimerspec64 *t) { struct timerfd_ctx *ctx; CLASS(fd, f)(ufd); if (fd_empty(f)) return -EBADF; if (fd_file(f)->f_op != &timerfd_fops) return -EINVAL; ctx = fd_file(f)->private_data; spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv) { ctx->expired = 0; if (isalarm(ctx)) { ctx->ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ctx->ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } } t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); t->it_interval = ktime_to_timespec64(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); return 0; } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct __kernel_itimerspec __user *, utmr, struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 new, old; int ret; if (get_itimerspec64(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && put_itimerspec64(&old, otmr)) return -EFAULT; return ret; } SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags, const struct old_itimerspec32 __user *, utmr, struct old_itimerspec32 __user *, otmr) { struct itimerspec64 new, old; int ret; if (get_old_itimerspec32(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && put_old_itimerspec32(&old, otmr)) return -EFAULT; return ret; } SYSCALL_DEFINE2(timerfd_gettime32, int, ufd, struct old_itimerspec32 __user *, otmr) { struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0; } #endif
1 1 102 12 86 13 101 97 7 102 102 6 2 6 6 6 1 5 2 6 110 12 97 102 102 102 5 91 13 13 65 2 2 2 2 2 3 3 97 112 10 297 287 205 111 4 10 4 1 14 112 112 278 14 288 34 287 287 288 34 287 303 302 15 288 302 303 302 303 301 303 303 302 304 8 303 290 6 288 179 19 162 102 18 2 287 286 286 276 18 285 6 298 1 1 1 297 296 1 4 1 292 25 1 291 292 1 2 290 3 285 297 297 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 // SPDX-License-Identifier: GPL-2.0 /* * mm/mprotect.c * * (C) Copyright 1994 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig * * Address space accounting code <alan@lxorguk.ukuu.org.uk> * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ #include <linux/pagewalk.h> #include <linux/hugetlb.h> #include <linux/shm.h> #include <linux/mman.h> #include <linux/fs.h> #include <linux/highmem.h> #include <linux/security.h> #include <linux/mempolicy.h> #include <linux/personality.h> #include <linux/syscalls.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/migrate.h> #include <linux/perf_event.h> #include <linux/pkeys.h> #include <linux/ksm.h> #include <linux/uaccess.h> #include <linux/mm_inline.h> #include <linux/pgtable.h> #include <linux/userfaultfd_k.h> #include <uapi/linux/mman.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include "internal.h" static bool maybe_change_pte_writable(struct vm_area_struct *vma, pte_t pte) { if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE))) return false; /* Don't touch entries that are not even readable. */ if (pte_protnone(pte)) return false; /* Do we need write faults for softdirty tracking? */ if (pte_needs_soft_dirty_wp(vma, pte)) return false; /* Do we need write faults for uffd-wp tracking? */ if (userfaultfd_pte_wp(vma, pte)) return false; return true; } static bool can_change_private_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { struct page *page; if (!maybe_change_pte_writable(vma, pte)) return false; /* * Writable MAP_PRIVATE mapping: We can only special-case on * exclusive anonymous pages, because we know that our * write-fault handler similarly would map them writable without * any additional checks while holding the PT lock. */ page = vm_normal_page(vma, addr, pte); return page && PageAnon(page) && PageAnonExclusive(page); } static bool can_change_shared_pte_writable(struct vm_area_struct *vma, pte_t pte) { if (!maybe_change_pte_writable(vma, pte)) return false; VM_WARN_ON_ONCE(is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte)); /* * Writable MAP_SHARED mapping: "clean" might indicate that the FS still * needs a real write-fault for writenotify * (see vma_wants_writenotify()). If "dirty", the assumption is that the * FS was already notified and we can simply mark the PTE writable * just like the write-fault handler would do. */ return pte_dirty(pte); } bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { if (!(vma->vm_flags & VM_SHARED)) return can_change_private_pte_writable(vma, addr, pte); return can_change_shared_pte_writable(vma, pte); } static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, int max_nr_ptes, fpb_t flags) { /* No underlying folio, so cannot batch */ if (!folio) return 1; if (!folio_test_large(folio)) return 1; return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); } /* Set nr_ptes number of ptes, starting from idx */ static void prot_commit_flush_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, int idx, bool set_write, struct mmu_gather *tlb) { /* * Advance the position in the batch by idx; note that if idx > 0, * then the nr_ptes passed here is <= batch size - idx. */ addr += idx * PAGE_SIZE; ptep += idx; oldpte = pte_advance_pfn(oldpte, idx); ptent = pte_advance_pfn(ptent, idx); if (set_write) ptent = pte_mkwrite(ptent, vma); modify_prot_commit_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes); if (pte_needs_flush(oldpte, ptent)) tlb_flush_pte_range(tlb, addr, nr_ptes * PAGE_SIZE); } /* * Get max length of consecutive ptes pointing to PageAnonExclusive() pages or * !PageAnonExclusive() pages, starting from start_idx. Caller must enforce * that the ptes point to consecutive pages of the same anon large folio. */ static int page_anon_exclusive_sub_batch(int start_idx, int max_len, struct page *first_page, bool expected_anon_exclusive) { int idx; for (idx = start_idx + 1; idx < start_idx + max_len; ++idx) { if (expected_anon_exclusive != PageAnonExclusive(first_page + idx)) break; } return idx - start_idx; } /* * This function is a result of trying our very best to retain the * "avoid the write-fault handler" optimization. In can_change_pte_writable(), * if the vma is a private vma, and we cannot determine whether to change * the pte to writable just from the vma and the pte, we then need to look * at the actual page pointed to by the pte. Unfortunately, if we have a * batch of ptes pointing to consecutive pages of the same anon large folio, * the anon-exclusivity (or the negation) of the first page does not guarantee * the anon-exclusivity (or the negation) of the other pages corresponding to * the pte batch; hence in this case it is incorrect to decide to change or * not change the ptes to writable just by using information from the first * pte of the batch. Therefore, we must individually check all pages and * retrieve sub-batches. */ static void commit_anon_folio_batch(struct vm_area_struct *vma, struct folio *folio, struct page *first_page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { bool expected_anon_exclusive; int sub_batch_idx = 0; int len; while (nr_ptes) { expected_anon_exclusive = PageAnonExclusive(first_page + sub_batch_idx); len = page_anon_exclusive_sub_batch(sub_batch_idx, nr_ptes, first_page, expected_anon_exclusive); prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, len, sub_batch_idx, expected_anon_exclusive, tlb); sub_batch_idx += len; nr_ptes -= len; } } static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { bool set_write; if (vma->vm_flags & VM_SHARED) { set_write = can_change_shared_pte_writable(vma, ptent); prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes, /* idx = */ 0, set_write, tlb); return; } set_write = maybe_change_pte_writable(vma, ptent) && (folio && folio_test_anon(folio)); if (!set_write) { prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, nr_ptes, /* idx = */ 0, set_write, tlb); return; } commit_anon_folio_batch(vma, folio, page, addr, ptep, oldpte, ptent, nr_ptes, tlb); } static long change_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { pte_t *pte, oldpte; spinlock_t *ptl; long pages = 0; bool is_private_single_threaded; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; bool uffd_wp = cp_flags & MM_CP_UFFD_WP; bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; int nr_ptes; tlb_change_page_size(tlb, PAGE_SIZE); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (!pte) return -EAGAIN; if (prot_numa) is_private_single_threaded = vma_is_single_threaded_private(vma); flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { nr_ptes = 1; oldpte = ptep_get(pte); if (pte_present(oldpte)) { const fpb_t flags = FPB_RESPECT_SOFT_DIRTY | FPB_RESPECT_WRITE; int max_nr_ptes = (end - addr) >> PAGE_SHIFT; struct folio *folio = NULL; struct page *page; pte_t ptent; /* Already in the desired state. */ if (prot_numa && pte_protnone(oldpte)) continue; page = vm_normal_page(vma, addr, oldpte); if (page) folio = page_folio(page); /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ if (prot_numa && !folio_can_map_prot_numa(folio, vma, is_private_single_threaded)) { /* determine batch to skip */ nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, /* flags = */ 0); continue; } nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes); ptent = pte_modify(oldpte, newprot); if (uffd_wp) ptent = pte_mkuffd_wp(ptent); else if (uffd_wp_resolve) ptent = pte_clear_uffd_wp(ptent); /* * In some writable, shared mappings, we might want * to catch actual write access -- see * vma_wants_writenotify(). * * In all writable, private mappings, we have to * properly handle COW. * * In both cases, we can sometimes still change PTEs * writable and avoid the write-fault handler, for * example, if a PTE is already dirty and no other * COW or special handling is required. */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent)) set_write_prot_commit_flush_ptes(vma, folio, page, addr, pte, oldpte, ptent, nr_ptes, tlb); else prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb); pages += nr_ptes; } else if (pte_none(oldpte)) { /* * Nobody plays with any none ptes besides * userfaultfd when applying the protections. */ if (likely(!uffd_wp)) continue; if (userfaultfd_wp_use_markers(vma)) { /* * For file-backed mem, we need to be able to * wr-protect a none pte, because even if the * pte is none, the page/swap cache could * exist. Doing that by install a marker. */ set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); pages++; } } else { softleaf_t entry = softleaf_from_pte(oldpte); pte_t newpte; if (softleaf_is_migration_write(entry)) { const struct folio *folio = softleaf_to_folio(entry); /* * A protection check is difficult so * just be safe and disable write */ if (folio_test_anon(folio)) entry = make_readable_exclusive_migration_entry( swp_offset(entry)); else entry = make_readable_migration_entry(swp_offset(entry)); newpte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(oldpte)) newpte = pte_swp_mksoft_dirty(newpte); } else if (softleaf_is_device_private_write(entry)) { /* * We do not preserve soft-dirtiness. See * copy_nonpresent_pte() for explanation. */ entry = make_readable_device_private_entry( swp_offset(entry)); newpte = swp_entry_to_pte(entry); if (pte_swp_uffd_wp(oldpte)) newpte = pte_swp_mkuffd_wp(newpte); } else if (softleaf_is_marker(entry)) { /* * Ignore error swap entries unconditionally, * because any access should sigbus/sigsegv * anyway. */ if (softleaf_is_poison_marker(entry) || softleaf_is_guard_marker(entry)) continue; /* * If this is uffd-wp pte marker and we'd like * to unprotect it, drop it; the next page * fault will trigger without uffd trapping. */ if (uffd_wp_resolve) { pte_clear(vma->vm_mm, addr, pte); pages++; } continue; } else { newpte = oldpte; } if (uffd_wp) newpte = pte_swp_mkuffd_wp(newpte); else if (uffd_wp_resolve) newpte = pte_swp_clear_uffd_wp(newpte); if (!pte_same(oldpte, newpte)) { set_pte_at(vma->vm_mm, addr, pte, newpte); pages++; } } } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return pages; } /* * Return true if we want to split THPs into PTE mappings in change * protection procedure, false otherwise. */ static inline bool pgtable_split_needed(struct vm_area_struct *vma, unsigned long cp_flags) { /* * pte markers only resides in pte level, if we need pte markers, * we need to split. For example, we cannot wr-protect a file thp * (e.g. 2M shmem) because file thp is handled differently when * split by erasing the pmd so far. */ return (cp_flags & MM_CP_UFFD_WP) && !vma_is_anonymous(vma); } /* * Return true if we want to populate pgtables in change protection * procedure, false otherwise */ static inline bool pgtable_populate_needed(struct vm_area_struct *vma, unsigned long cp_flags) { /* If not within ioctl(UFFDIO_WRITEPROTECT), then don't bother */ if (!(cp_flags & MM_CP_UFFD_WP)) return false; /* Populate if the userfaultfd mode requires pte markers */ return userfaultfd_wp_use_markers(vma); } /* * Populate the pgtable underneath for whatever reason if requested. * When {pte|pmd|...}_alloc() failed we treat it the same way as pgtable * allocation failures during page faults by kicking OOM and returning * error. */ #define change_pmd_prepare(vma, pmd, cp_flags) \ ({ \ long err = 0; \ if (unlikely(pgtable_populate_needed(vma, cp_flags))) { \ if (pte_alloc(vma->vm_mm, pmd)) \ err = -ENOMEM; \ } \ err; \ }) /* * This is the general pud/p4d/pgd version of change_pmd_prepare(). We need to * have separate change_pmd_prepare() because pte_alloc() returns 0 on success, * while {pmd|pud|p4d}_alloc() returns the valid pointer on success. */ #define change_prepare(vma, high, low, addr, cp_flags) \ ({ \ long err = 0; \ if (unlikely(pgtable_populate_needed(vma, cp_flags))) { \ low##_t *p = low##_alloc(vma->vm_mm, high, addr); \ if (p == NULL) \ err = -ENOMEM; \ } \ err; \ }) static inline long change_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { pmd_t *pmd; unsigned long next; long pages = 0; unsigned long nr_huge_updates = 0; pmd = pmd_offset(pud, addr); do { long ret; pmd_t _pmd; again: next = pmd_addr_end(addr, end); ret = change_pmd_prepare(vma, pmd, cp_flags); if (ret) { pages = ret; break; } if (pmd_none(*pmd)) goto next; _pmd = pmdp_get_lockless(pmd); if (pmd_is_huge(_pmd)) { if ((next - addr != HPAGE_PMD_SIZE) || pgtable_split_needed(vma, cp_flags)) { __split_huge_pmd(vma, pmd, addr, false); /* * For file-backed, the pmd could have been * cleared; make sure pmd populated if * necessary, then fall-through to pte level. */ ret = change_pmd_prepare(vma, pmd, cp_flags); if (ret) { pages = ret; break; } } else { ret = change_huge_pmd(tlb, vma, pmd, addr, newprot, cp_flags); if (ret) { if (ret == HPAGE_PMD_NR) { pages += HPAGE_PMD_NR; nr_huge_updates++; } /* huge pmd was handled */ goto next; } } /* fall through, the trans huge pmd just split */ } ret = change_pte_range(tlb, vma, pmd, addr, next, newprot, cp_flags); if (ret < 0) goto again; pages += ret; next: cond_resched(); } while (pmd++, addr = next, addr != end); if (nr_huge_updates) count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); return pages; } static inline long change_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { struct mmu_notifier_range range; pud_t *pudp, pud; unsigned long next; long pages = 0, ret; range.start = 0; pudp = pud_offset(p4d, addr); do { again: next = pud_addr_end(addr, end); ret = change_prepare(vma, pudp, pmd, addr, cp_flags); if (ret) { pages = ret; break; } pud = pudp_get(pudp); if (pud_none(pud)) continue; if (!range.start) { mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0, vma->vm_mm, addr, end); mmu_notifier_invalidate_range_start(&range); } if (pud_leaf(pud)) { if ((next - addr != PUD_SIZE) || pgtable_split_needed(vma, cp_flags)) { __split_huge_pud(vma, pudp, addr); goto again; } else { ret = change_huge_pud(tlb, vma, pudp, addr, newprot, cp_flags); if (ret == 0) goto again; /* huge pud was handled */ if (ret == HPAGE_PUD_NR) pages += HPAGE_PUD_NR; continue; } } pages += change_pmd_range(tlb, vma, pudp, addr, next, newprot, cp_flags); } while (pudp++, addr = next, addr != end); if (range.start) mmu_notifier_invalidate_range_end(&range); return pages; } static inline long change_p4d_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { p4d_t *p4d; unsigned long next; long pages = 0, ret; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); ret = change_prepare(vma, p4d, pud, addr, cp_flags); if (ret) return ret; if (p4d_none_or_clear_bad(p4d)) continue; pages += change_pud_range(tlb, vma, p4d, addr, next, newprot, cp_flags); } while (p4d++, addr = next, addr != end); return pages; } static long change_protection_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; unsigned long next; long pages = 0, ret; BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); tlb_start_vma(tlb, vma); do { next = pgd_addr_end(addr, end); ret = change_prepare(vma, pgd, p4d, addr, cp_flags); if (ret) { pages = ret; break; } if (pgd_none_or_clear_bad(pgd)) continue; pages += change_p4d_range(tlb, vma, pgd, addr, next, newprot, cp_flags); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); return pages; } long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags) { pgprot_t newprot = vma->vm_page_prot; long pages; BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL); #ifdef CONFIG_NUMA_BALANCING /* * Ordinary protection updates (mprotect, uffd-wp, softdirty tracking) * are expected to reflect their requirements via VMA flags such that * vma_set_page_prot() will adjust vma->vm_page_prot accordingly. */ if (cp_flags & MM_CP_PROT_NUMA) newprot = PAGE_NONE; #else WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA); #endif if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(vma, start, end, newprot, cp_flags); else pages = change_protection_range(tlb, vma, start, end, newprot, cp_flags); return pages; } static int prot_none_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { return pfn_modify_allowed(pte_pfn(ptep_get(pte)), *(pgprot_t *)(walk->private)) ? 0 : -EACCES; } static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long next, struct mm_walk *walk) { return pfn_modify_allowed(pte_pfn(ptep_get(pte)), *(pgprot_t *)(walk->private)) ? 0 : -EACCES; } static int prot_none_test(unsigned long addr, unsigned long next, struct mm_walk *walk) { return 0; } static const struct mm_walk_ops prot_none_walk_ops = { .pte_entry = prot_none_pte_entry, .hugetlb_entry = prot_none_hugetlb_entry, .test_walk = prot_none_test, .walk_lock = PGWALK_WRLOCK, }; int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, vm_flags_t newflags) { struct mm_struct *mm = vma->vm_mm; vm_flags_t oldflags = READ_ONCE(vma->vm_flags); long nrpages = (end - start) >> PAGE_SHIFT; unsigned int mm_cp_flags = 0; unsigned long charged = 0; int error; if (vma_is_sealed(vma)) return -EPERM; if (newflags == oldflags) { *pprev = vma; return 0; } /* * Do PROT_NONE PFN permission checks here when we can still * bail out without undoing a lot of state. This is a rather * uncommon case, so doesn't need to be very optimized. */ if (arch_has_pfn_modify_check() && (oldflags & (VM_PFNMAP|VM_MIXEDMAP)) && (newflags & VM_ACCESS_FLAGS) == 0) { pgprot_t new_pgprot = vm_get_page_prot(newflags); error = walk_page_range(current->mm, start, end, &prot_none_walk_ops, &new_pgprot); if (error) return error; } /* * If we make a private mapping writable we increase our commit; * but (without finer accounting) cannot reduce our commit if we * make it unwritable again except in the anonymous case where no * anon_vma has yet to be assigned. * * hugetlb mapping were accounted for even if read-only so there is * no need to account for them here. */ if (newflags & VM_WRITE) { /* Check space limits when area turns into data. */ if (!may_expand_vm(mm, newflags, nrpages) && may_expand_vm(mm, oldflags, nrpages)) return -ENOMEM; if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| VM_SHARED|VM_NORESERVE))) { charged = nrpages; if (security_vm_enough_memory_mm(mm, charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } } else if ((oldflags & VM_ACCOUNT) && vma_is_anonymous(vma) && !vma->anon_vma) { newflags &= ~VM_ACCOUNT; } vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags); if (IS_ERR(vma)) { error = PTR_ERR(vma); goto fail; } *pprev = vma; /* * vm_flags and vm_page_prot are protected by the mmap_lock * held in write mode. */ vma_start_write(vma); vm_flags_reset_once(vma, newflags); if (vma_wants_manual_pte_write_upgrade(vma)) mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; vma_set_page_prot(vma); change_protection(tlb, vma, start, end, mm_cp_flags); if ((oldflags & VM_ACCOUNT) && !(newflags & VM_ACCOUNT)) vm_unacct_memory(nrpages); /* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major * fault on access. */ if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && (newflags & VM_WRITE)) { populate_vma_page_range(vma, start, end, NULL); } vm_stat_account(mm, oldflags, -nrpages); vm_stat_account(mm, newflags, nrpages); perf_event_mmap(vma); return 0; fail: vm_unacct_memory(charged); return error; } /* * pkey==-1 when doing a legacy mprotect() */ static int do_mprotect_pkey(unsigned long start, size_t len, unsigned long prot, int pkey) { unsigned long nstart, end, tmp, reqprot; struct vm_area_struct *vma, *prev; int error; const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); const bool rier = (current->personality & READ_IMPLIES_EXEC) && (prot & PROT_READ); struct mmu_gather tlb; struct vma_iterator vmi; start = untagged_addr(start); prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ return -EINVAL; if (start & ~PAGE_MASK) return -EINVAL; if (!len) return 0; len = PAGE_ALIGN(len); end = start + len; if (end <= start) return -ENOMEM; if (!arch_validate_prot(prot, start)) return -EINVAL; reqprot = prot; if (mmap_write_lock_killable(current->mm)) return -EINTR; /* * If userspace did not allocate the pkey, do not let * them use it here. */ error = -EINVAL; if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey)) goto out; vma_iter_init(&vmi, current->mm, start); vma = vma_find(&vmi, end); error = -ENOMEM; if (!vma) goto out; if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out; start = vma->vm_start; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; } else { if (vma->vm_start > start) goto out; if (unlikely(grows & PROT_GROWSUP)) { end = vma->vm_end; error = -EINVAL; if (!(vma->vm_flags & VM_GROWSUP)) goto out; } } prev = vma_prev(&vmi); if (start > vma->vm_start) prev = vma; tlb_gather_mmu(&tlb, current->mm); nstart = start; tmp = vma->vm_start; for_each_vma_range(vmi, vma, end) { vm_flags_t mask_off_old_flags; vm_flags_t newflags; int new_vma_pkey; if (vma->vm_start != tmp) { error = -ENOMEM; break; } /* Does the application expect PROT_READ to imply PROT_EXEC */ if (rier && (vma->vm_flags & VM_MAYEXEC)) prot |= PROT_EXEC; /* * Each mprotect() call explicitly passes r/w/x permissions. * If a permission is not passed to mprotect(), it must be * cleared from the VMA. */ mask_off_old_flags = VM_ACCESS_FLAGS | VM_FLAGS_CLEAR; new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey); newflags = calc_vm_prot_bits(prot, new_vma_pkey); newflags |= (vma->vm_flags & ~mask_off_old_flags); /* newflags >> 4 shift VM_MAY% in place of VM_% */ if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { error = -EACCES; break; } if (map_deny_write_exec(vma->vm_flags, newflags)) { error = -EACCES; break; } /* Allow architectures to sanity-check the new flags */ if (!arch_validate_flags(newflags)) { error = -EINVAL; break; } error = security_file_mprotect(vma, reqprot, prot); if (error) break; tmp = vma->vm_end; if (tmp > end) tmp = end; if (vma->vm_ops && vma->vm_ops->mprotect) { error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags); if (error) break; } error = mprotect_fixup(&vmi, &tlb, vma, &prev, nstart, tmp, newflags); if (error) break; tmp = vma_iter_end(&vmi); nstart = tmp; prot = reqprot; } tlb_finish_mmu(&tlb); if (!error && tmp < end) error = -ENOMEM; out: mmap_write_unlock(current->mm); return error; } SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot) { return do_mprotect_pkey(start, len, prot, -1); } #ifdef CONFIG_ARCH_HAS_PKEYS SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len, unsigned long, prot, int, pkey) { return do_mprotect_pkey(start, len, prot, pkey); } SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val) { int pkey; int ret; /* No flags supported yet. */ if (flags) return -EINVAL; /* check for unsupported init values */ if (init_val & ~PKEY_ACCESS_MASK) return -EINVAL; mmap_write_lock(current->mm); pkey = mm_pkey_alloc(current->mm); ret = -ENOSPC; if (pkey == -1) goto out; ret = arch_set_user_pkey_access(current, pkey, init_val); if (ret) { mm_pkey_free(current->mm, pkey); goto out; } ret = pkey; out: mmap_write_unlock(current->mm); return ret; } SYSCALL_DEFINE1(pkey_free, int, pkey) { int ret; mmap_write_lock(current->mm); ret = mm_pkey_free(current->mm, pkey); mmap_write_unlock(current->mm); /* * We could provide warnings or errors if any VMA still * has the pkey set here. */ return ret; } #endif /* CONFIG_ARCH_HAS_PKEYS */
1 9 6 3 6 2 1 2 3 3 3 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 // SPDX-License-Identifier: GPL-2.0-only /* * Module for modifying the secmark field of the skb, for use by * security subsystems. * * Based on the nfmark match by: * (C) 1999-2001 Marc Boucher <marc@mbsi.ca> * * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/security.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_SECMARK.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); MODULE_DESCRIPTION("Xtables: packet security mark modification"); MODULE_ALIAS("ipt_SECMARK"); MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; static unsigned int secmark_tg(struct sk_buff *skb, const struct xt_secmark_target_info_v1 *info) { u32 secmark = 0; switch (mode) { case SECMARK_MODE_SEL: secmark = info->secid; break; default: BUG(); } skb->secmark = secmark; return XT_CONTINUE; } static int checkentry_lsm(struct xt_secmark_target_info_v1 *info) { int err; info->secctx[SECMARK_SECCTX_MAX - 1] = '\0'; info->secid = 0; err = security_secctx_to_secid(info->secctx, strlen(info->secctx), &info->secid); if (err) { if (err == -EINVAL) pr_info_ratelimited("invalid security context \'%s\'\n", info->secctx); return err; } if (!info->secid) { pr_info_ratelimited("unable to map security context \'%s\'\n", info->secctx); return -ENOENT; } err = security_secmark_relabel_packet(info->secid); if (err) { pr_info_ratelimited("unable to obtain relabeling permission\n"); return err; } security_secmark_refcount_inc(); return 0; } static int secmark_tg_check(const char *table, struct xt_secmark_target_info_v1 *info) { int err; if (strcmp(table, "mangle") != 0 && strcmp(table, "security") != 0) { pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", table); return -EINVAL; } if (mode && mode != info->mode) { pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n", mode, info->mode); return -EINVAL; } switch (info->mode) { case SECMARK_MODE_SEL: break; default: pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } err = checkentry_lsm(info); if (err) return err; if (!mode) mode = info->mode; return 0; } static void secmark_tg_destroy(const struct xt_tgdtor_param *par) { switch (mode) { case SECMARK_MODE_SEL: security_secmark_refcount_dec(); } } static int secmark_tg_check_v0(const struct xt_tgchk_param *par) { struct xt_secmark_target_info *info = par->targinfo; struct xt_secmark_target_info_v1 newinfo = { .mode = info->mode, }; int ret; memcpy(newinfo.secctx, info->secctx, SECMARK_SECCTX_MAX); ret = secmark_tg_check(par->table, &newinfo); info->secid = newinfo.secid; return ret; } static unsigned int secmark_tg_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_secmark_target_info *info = par->targinfo; struct xt_secmark_target_info_v1 newinfo = { .secid = info->secid, }; return secmark_tg(skb, &newinfo); } static int secmark_tg_check_v1(const struct xt_tgchk_param *par) { return secmark_tg_check(par->table, par->targinfo); } static unsigned int secmark_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { return secmark_tg(skb, par->targinfo); } static struct xt_target secmark_tg_reg[] __read_mostly = { { .name = "SECMARK", .revision = 0, .family = NFPROTO_IPV4, .checkentry = secmark_tg_check_v0, .destroy = secmark_tg_destroy, .target = secmark_tg_v0, .targetsize = sizeof(struct xt_secmark_target_info), .me = THIS_MODULE, }, { .name = "SECMARK", .revision = 1, .family = NFPROTO_IPV4, .checkentry = secmark_tg_check_v1, .destroy = secmark_tg_destroy, .target = secmark_tg_v1, .targetsize = sizeof(struct xt_secmark_target_info_v1), .usersize = offsetof(struct xt_secmark_target_info_v1, secid), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "SECMARK", .revision = 0, .family = NFPROTO_IPV6, .checkentry = secmark_tg_check_v0, .destroy = secmark_tg_destroy, .target = secmark_tg_v0, .targetsize = sizeof(struct xt_secmark_target_info), .me = THIS_MODULE, }, { .name = "SECMARK", .revision = 1, .family = NFPROTO_IPV6, .checkentry = secmark_tg_check_v1, .destroy = secmark_tg_destroy, .target = secmark_tg_v1, .targetsize = sizeof(struct xt_secmark_target_info_v1), .usersize = offsetof(struct xt_secmark_target_info_v1, secid), .me = THIS_MODULE, }, #endif }; static int __init secmark_tg_init(void) { return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); } static void __exit secmark_tg_exit(void) { xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); } module_init(secmark_tg_init); module_exit(secmark_tg_exit);
332 1977 1976 1978 1979 1111 1977 102 411 187 187 187 22 162 183 172 502 637 2 187 43 451 784 877 878 192 713 4 21 3 279 877 877 713 195 21 216 58 58 18 18 18 18 18 12 12 12 11 12 12 12 12 12 9 11 9 11 12 13 13 13 13 3 30 30 22 29 30 29 33 33 33 24 33 33 4 1 368 364 7 7 7 323 19 12 12 12 1 1 10 11 6 5 11 1 1 19 11 19 19 19 19 10 12 12 19 19 18 19 18 1187 1186 775 776 718 58 773 775 333 333 9 332 73 9 1987 73 1969 1187 1375 366 320 331 333 120 1 127 1 1 324 309 15 1382 1381 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "hard-interface.h" #include "main.h" #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/kref.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/printk.h> #include <linux/rculist.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <uapi/linux/batadv_packet.h> #include "bat_v.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "log.h" #include "mesh-interface.h" #include "originator.h" #include "send.h" #include "translation-table.h" /** * batadv_hardif_release() - release hard interface from lists and queue for * free after rcu grace period * @ref: kref pointer of the hard interface */ void batadv_hardif_release(struct kref *ref) { struct batadv_hard_iface *hard_iface; hard_iface = container_of(ref, struct batadv_hard_iface, refcount); netdev_put(hard_iface->net_dev, &hard_iface->dev_tracker); kfree_rcu(hard_iface, rcu); } /** * batadv_hardif_get_by_netdev() - Get hard interface object of a net_device * @net_dev: net_device to search for * * Return: batadv_hard_iface of net_dev (with increased refcnt), NULL on errors */ struct batadv_hard_iface * batadv_hardif_get_by_netdev(const struct net_device *net_dev) { struct batadv_hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->net_dev == net_dev && kref_get_unless_zero(&hard_iface->refcount)) goto out; } hard_iface = NULL; out: rcu_read_unlock(); return hard_iface; } /** * batadv_getlink_net() - return link net namespace (of use fallback) * @netdev: net_device to check * @fallback_net: return in case get_link_net is not available for @netdev * * Return: result of rtnl_link_ops->get_link_net or @fallback_net */ static struct net *batadv_getlink_net(const struct net_device *netdev, struct net *fallback_net) { if (!netdev->rtnl_link_ops) return fallback_net; if (!netdev->rtnl_link_ops->get_link_net) return fallback_net; return netdev->rtnl_link_ops->get_link_net(netdev); } /** * batadv_mutual_parents() - check if two devices are each others parent * @dev1: 1st net dev * @net1: 1st devices netns * @dev2: 2nd net dev * @net2: 2nd devices netns * * veth devices come in pairs and each is the parent of the other! * * Return: true if the devices are each others parent, otherwise false */ static bool batadv_mutual_parents(const struct net_device *dev1, struct net *net1, const struct net_device *dev2, struct net *net2) { int dev1_parent_iflink = dev_get_iflink(dev1); int dev2_parent_iflink = dev_get_iflink(dev2); const struct net *dev1_parent_net; const struct net *dev2_parent_net; dev1_parent_net = batadv_getlink_net(dev1, net1); dev2_parent_net = batadv_getlink_net(dev2, net2); if (!dev1_parent_iflink || !dev2_parent_iflink) return false; return (dev1_parent_iflink == dev2->ifindex) && (dev2_parent_iflink == dev1->ifindex) && net_eq(dev1_parent_net, net2) && net_eq(dev2_parent_net, net1); } /** * batadv_is_on_batman_iface() - check if a device is a batman iface descendant * @net_dev: the device to check * * If the user creates any virtual device on top of a batman-adv interface, it * is important to prevent this new interface from being used to create a new * mesh network (this behaviour would lead to a batman-over-batman * configuration). This function recursively checks all the fathers of the * device passed as argument looking for a batman-adv mesh interface. * * Return: true if the device is descendant of a batman-adv mesh interface (or * if it is a batman-adv interface itself), false otherwise */ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) { struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_meshif_is_valid(net_dev)) return true; iflink = dev_get_iflink(net_dev); if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); /* iflink to itself, most likely physical device */ if (net == parent_net && iflink == net_dev->ifindex) return false; /* recurse over the parent device */ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); if (!parent_dev) { pr_warn("Cannot find parent device. Skipping batadv-on-batadv check for %s\n", net_dev->name); return false; } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; ret = batadv_is_on_batman_iface(parent_dev); return ret; } static bool batadv_is_valid_iface(const struct net_device *net_dev) { if (net_dev->flags & IFF_LOOPBACK) return false; if (net_dev->type != ARPHRD_ETHER) return false; if (net_dev->addr_len != ETH_ALEN) return false; /* no batman over batman */ if (batadv_is_on_batman_iface(net_dev)) return false; return true; } /** * batadv_get_real_netdevice() - check if the given netdev struct is a virtual * interface on top of another 'real' interface * @netdev: the device to check * * Callers must hold the rtnl semaphore. You may want batadv_get_real_netdev() * instead of this. * * Return: the 'real' net device or the original net device and NULL in case * of an error. */ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) { struct batadv_hard_iface *hard_iface = NULL; struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; int iflink; ASSERT_RTNL(); if (!netdev) return NULL; iflink = dev_get_iflink(netdev); if (iflink == 0) { dev_hold(netdev); return netdev; } hard_iface = batadv_hardif_get_by_netdev(netdev); if (!hard_iface || !hard_iface->mesh_iface) goto out; net = dev_net(hard_iface->mesh_iface); real_net = batadv_getlink_net(netdev, net); /* iflink to itself, most likely physical device */ if (net == real_net && netdev->ifindex == iflink) { real_netdev = netdev; dev_hold(real_netdev); goto out; } real_netdev = dev_get_by_index(real_net, iflink); out: batadv_hardif_put(hard_iface); return real_netdev; } /** * batadv_get_real_netdev() - check if the given net_device struct is a virtual * interface on top of another 'real' interface * @net_device: the device to check * * Return: the 'real' net device or the original net device and NULL in case * of an error. */ struct net_device *batadv_get_real_netdev(struct net_device *net_device) { struct net_device *real_netdev; rtnl_lock(); real_netdev = batadv_get_real_netdevice(net_device); rtnl_unlock(); return real_netdev; } /** * batadv_is_wext_netdev() - check if the given net_device struct is a * wext wifi interface * @net_device: the device to check * * Return: true if the net device is a wext wireless device, false * otherwise. */ static bool batadv_is_wext_netdev(struct net_device *net_device) { if (!net_device) return false; #ifdef CONFIG_WIRELESS_EXT /* pre-cfg80211 drivers have to implement WEXT, so it is possible to * check for wireless_handlers != NULL */ if (net_device->wireless_handlers) return true; #endif return false; } /** * batadv_is_cfg80211_netdev() - check if the given net_device struct is a * cfg80211 wifi interface * @net_device: the device to check * * Return: true if the net device is a cfg80211 wireless device, false * otherwise. */ static bool batadv_is_cfg80211_netdev(struct net_device *net_device) { if (!net_device) return false; #if IS_ENABLED(CONFIG_CFG80211) /* cfg80211 drivers have to set ieee80211_ptr */ if (net_device->ieee80211_ptr) return true; #endif return false; } /** * batadv_wifi_flags_evaluate() - calculate wifi flags for net_device * @net_device: the device to check * * Return: batadv_hard_iface_wifi_flags flags of the device */ static u32 batadv_wifi_flags_evaluate(struct net_device *net_device) { u32 wifi_flags = 0; struct net_device *real_netdev; if (batadv_is_wext_netdev(net_device)) wifi_flags |= BATADV_HARDIF_WIFI_WEXT_DIRECT; if (batadv_is_cfg80211_netdev(net_device)) wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; real_netdev = batadv_get_real_netdevice(net_device); if (!real_netdev) return wifi_flags; if (real_netdev == net_device) goto out; if (batadv_is_wext_netdev(real_netdev)) wifi_flags |= BATADV_HARDIF_WIFI_WEXT_INDIRECT; if (batadv_is_cfg80211_netdev(real_netdev)) wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT; out: dev_put(real_netdev); return wifi_flags; } /** * batadv_is_cfg80211_hardif() - check if the given hardif is a cfg80211 wifi * interface * @hard_iface: the device to check * * Return: true if the net device is a cfg80211 wireless device, false * otherwise. */ bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface) { u32 allowed_flags = 0; allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT; allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT; return !!(hard_iface->wifi_flags & allowed_flags); } /** * batadv_is_wifi_hardif() - check if the given hardif is a wifi interface * @hard_iface: the device to check * * Return: true if the net device is a 802.11 wireless device, false otherwise. */ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface) { if (!hard_iface) return false; return hard_iface->wifi_flags != 0; } /** * batadv_hardif_no_broadcast() - check whether (re)broadcast is necessary * @if_outgoing: the outgoing interface checked and considered for (re)broadcast * @orig_addr: the originator of this packet * @orig_neigh: originator address of the forwarder we just got the packet from * (NULL if we originated) * * Checks whether a packet needs to be (re)broadcasted on the given interface. * * Return: * BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface * BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder * BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator * BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast */ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, u8 *orig_addr, u8 *orig_neigh) { struct batadv_hardif_neigh_node *hardif_neigh; struct hlist_node *first; int ret = BATADV_HARDIF_BCAST_OK; rcu_read_lock(); /* 0 neighbors -> no (re)broadcast */ first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list)); if (!first) { ret = BATADV_HARDIF_BCAST_NORECIPIENT; goto out; } /* >1 neighbors -> (re)broadcast */ if (rcu_dereference(hlist_next_rcu(first))) goto out; hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node, list); /* 1 neighbor, is the originator -> no rebroadcast */ if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) { ret = BATADV_HARDIF_BCAST_DUPORIG; /* 1 neighbor, is the one we received from -> no rebroadcast */ } else if (orig_neigh && batadv_compare_eth(hardif_neigh->orig, orig_neigh)) { ret = BATADV_HARDIF_BCAST_DUPFWD; } out: rcu_read_unlock(); return ret; } static struct batadv_hard_iface * batadv_hardif_get_active(struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; struct list_head *iter; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) { if (hard_iface->if_status == BATADV_IF_ACTIVE && kref_get_unless_zero(&hard_iface->refcount)) goto out; } hard_iface = NULL; out: rcu_read_unlock(); return hard_iface; } static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, struct batadv_hard_iface *oldif) { struct batadv_hard_iface *primary_if; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; batadv_dat_init_own_addr(bat_priv, primary_if); batadv_bla_update_orig_address(bat_priv, primary_if, oldif); out: batadv_hardif_put(primary_if); } static void batadv_primary_if_select(struct batadv_priv *bat_priv, struct batadv_hard_iface *new_hard_iface) { struct batadv_hard_iface *curr_hard_iface; ASSERT_RTNL(); if (new_hard_iface) kref_get(&new_hard_iface->refcount); curr_hard_iface = rcu_replace_pointer(bat_priv->primary_if, new_hard_iface, 1); if (!new_hard_iface) goto out; bat_priv->algo_ops->iface.primary_set(new_hard_iface); batadv_primary_if_update_addr(bat_priv, curr_hard_iface); out: batadv_hardif_put(curr_hard_iface); } static bool batadv_hardif_is_iface_up(const struct batadv_hard_iface *hard_iface) { if (hard_iface->net_dev->flags & IFF_UP) return true; return false; } static void batadv_check_known_mac_addr(const struct batadv_hard_iface *hard_iface) { struct net_device *mesh_iface = hard_iface->mesh_iface; const struct batadv_hard_iface *tmp_hard_iface; struct list_head *iter; if (!mesh_iface) return; netdev_for_each_lower_private(mesh_iface, tmp_hard_iface, iter) { if (tmp_hard_iface == hard_iface) continue; if (tmp_hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; if (!batadv_compare_eth(tmp_hard_iface->net_dev->dev_addr, hard_iface->net_dev->dev_addr)) continue; pr_warn("The newly added mac address (%pM) already exists on: %s\n", hard_iface->net_dev->dev_addr, tmp_hard_iface->net_dev->name); pr_warn("It is strongly recommended to keep mac addresses unique to avoid problems!\n"); } } /** * batadv_hardif_recalc_extra_skbroom() - Recalculate skbuff extra head/tailroom * @mesh_iface: netdev struct of the mesh interface */ static void batadv_hardif_recalc_extra_skbroom(struct net_device *mesh_iface) { const struct batadv_hard_iface *hard_iface; unsigned short lower_header_len = ETH_HLEN; unsigned short lower_headroom = 0; unsigned short lower_tailroom = 0; unsigned short needed_headroom; struct list_head *iter; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) { if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; lower_header_len = max_t(unsigned short, lower_header_len, hard_iface->net_dev->hard_header_len); lower_headroom = max_t(unsigned short, lower_headroom, hard_iface->net_dev->needed_headroom); lower_tailroom = max_t(unsigned short, lower_tailroom, hard_iface->net_dev->needed_tailroom); } rcu_read_unlock(); needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); /* fragmentation headers don't strip the unicast/... header */ needed_headroom += sizeof(struct batadv_frag_packet); mesh_iface->needed_headroom = needed_headroom; mesh_iface->needed_tailroom = lower_tailroom; } /** * batadv_hardif_min_mtu() - Calculate maximum MTU for mesh interface * @mesh_iface: netdev struct of the mesh interface * * Return: MTU for the mesh-interface (limited by the minimal MTU of all active * slave interfaces) */ int batadv_hardif_min_mtu(struct net_device *mesh_iface) { struct batadv_priv *bat_priv = netdev_priv(mesh_iface); const struct batadv_hard_iface *hard_iface; struct list_head *iter; int min_mtu = INT_MAX; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) { if (hard_iface->if_status != BATADV_IF_ACTIVE && hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu); } rcu_read_unlock(); if (atomic_read(&bat_priv->fragmentation) == 0) goto out; /* with fragmentation enabled the maximum size of internally generated * packets such as translation table exchanges or tvlv containers, etc * has to be calculated */ min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE); min_mtu -= sizeof(struct batadv_frag_packet); min_mtu *= BATADV_FRAG_MAX_FRAGMENTS; out: /* report to the other components the maximum amount of bytes that * batman-adv can send over the wire (without considering the payload * overhead). For example, this value is used by TT to compute the * maximum local table size */ atomic_set(&bat_priv->packet_size_max, min_mtu); /* the real mesh-interface MTU is computed by removing the payload * overhead from the maximum amount of bytes that was just computed. */ return min_t(int, min_mtu - batadv_max_header_len(), BATADV_MAX_MTU); } /** * batadv_update_min_mtu() - Adjusts the MTU if a new interface with a smaller * MTU appeared * @mesh_iface: netdev struct of the mesh interface */ void batadv_update_min_mtu(struct net_device *mesh_iface) { struct batadv_priv *bat_priv = netdev_priv(mesh_iface); int limit_mtu; int mtu; mtu = batadv_hardif_min_mtu(mesh_iface); if (bat_priv->mtu_set_by_user) limit_mtu = bat_priv->mtu_set_by_user; else limit_mtu = ETH_DATA_LEN; mtu = min(mtu, limit_mtu); dev_set_mtu(mesh_iface, mtu); /* Check if the local translate table should be cleaned up to match a * new (and smaller) MTU. */ batadv_tt_local_resize_to_mtu(mesh_iface); } static void batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED; /* the first active interface becomes our primary interface or * the next active interface after the old primary interface was removed */ primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) batadv_primary_if_select(bat_priv, hard_iface); batadv_info(hard_iface->mesh_iface, "Interface activated: %s\n", hard_iface->net_dev->name); batadv_update_min_mtu(hard_iface->mesh_iface); if (bat_priv->algo_ops->iface.activate) bat_priv->algo_ops->iface.activate(hard_iface); out: batadv_hardif_put(primary_if); } static void batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) { if (hard_iface->if_status != BATADV_IF_ACTIVE && hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) return; hard_iface->if_status = BATADV_IF_INACTIVE; batadv_info(hard_iface->mesh_iface, "Interface deactivated: %s\n", hard_iface->net_dev->name); batadv_update_min_mtu(hard_iface->mesh_iface); } /** * batadv_hardif_enable_interface() - Enslave hard interface to mesh interface * @hard_iface: hard interface to add to mesh interface * @mesh_iface: netdev struct of the mesh interface * * Return: 0 on success or negative error number in case of failure */ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, struct net_device *mesh_iface) { struct batadv_priv *bat_priv; __be16 ethertype = htons(ETH_P_BATMAN); int max_header_len = batadv_max_header_len(); unsigned int required_mtu; unsigned int hardif_mtu; int ret; hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu); required_mtu = READ_ONCE(mesh_iface->mtu) + max_header_len; if (hardif_mtu < ETH_MIN_MTU + max_header_len) return -EINVAL; if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) goto out; kref_get(&hard_iface->refcount); netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); hard_iface->mesh_iface = mesh_iface; bat_priv = netdev_priv(hard_iface->mesh_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, mesh_iface, hard_iface, NULL, NULL); if (ret) goto err_dev; ret = bat_priv->algo_ops->iface.enable(hard_iface); if (ret < 0) goto err_upper; hard_iface->if_status = BATADV_IF_INACTIVE; kref_get(&hard_iface->refcount); hard_iface->batman_adv_ptype.type = ethertype; hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv; hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; dev_add_pack(&hard_iface->batman_adv_ptype); batadv_info(hard_iface->mesh_iface, "Adding interface: %s\n", hard_iface->net_dev->name); if (atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); if (!atomic_read(&bat_priv->fragmentation) && hardif_mtu < required_mtu) batadv_info(hard_iface->mesh_iface, "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n", hard_iface->net_dev->name, hardif_mtu, required_mtu); batadv_check_known_mac_addr(hard_iface); if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); else batadv_err(hard_iface->mesh_iface, "Not using interface %s (retrying later): interface not active\n", hard_iface->net_dev->name); batadv_hardif_recalc_extra_skbroom(mesh_iface); if (bat_priv->algo_ops->iface.enabled) bat_priv->algo_ops->iface.enabled(hard_iface); out: return 0; err_upper: netdev_upper_dev_unlink(hard_iface->net_dev, mesh_iface); err_dev: hard_iface->mesh_iface = NULL; netdev_put(mesh_iface, &hard_iface->meshif_dev_tracker); batadv_hardif_put(hard_iface); return ret; } /** * batadv_hardif_cnt() - get number of interfaces enslaved to mesh interface * @mesh_iface: mesh interface to check * * This function is only using RCU for locking - the result can therefore be * off when another function is modifying the list at the same time. The * caller can use the rtnl_lock to make sure that the count is accurate. * * Return: number of connected/enslaved hard interfaces */ static size_t batadv_hardif_cnt(struct net_device *mesh_iface) { struct batadv_hard_iface *hard_iface; struct list_head *iter; size_t count = 0; rcu_read_lock(); netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) count++; rcu_read_unlock(); return count; } /** * batadv_hardif_disable_interface() - Remove hard interface from mesh interface * @hard_iface: hard interface to be removed */ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); struct batadv_hard_iface *primary_if = NULL; batadv_hardif_deactivate_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; batadv_info(hard_iface->mesh_iface, "Removing interface: %s\n", hard_iface->net_dev->name); dev_remove_pack(&hard_iface->batman_adv_ptype); batadv_hardif_put(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) { struct batadv_hard_iface *new_if; new_if = batadv_hardif_get_active(hard_iface->mesh_iface); batadv_primary_if_select(bat_priv, new_if); batadv_hardif_put(new_if); } bat_priv->algo_ops->iface.disable(hard_iface); hard_iface->if_status = BATADV_IF_NOT_IN_USE; /* delete all references to this hard_iface */ batadv_purge_orig_ref(bat_priv); batadv_purge_outstanding_packets(bat_priv, hard_iface); netdev_put(hard_iface->mesh_iface, &hard_iface->meshif_dev_tracker); netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->mesh_iface); batadv_hardif_recalc_extra_skbroom(hard_iface->mesh_iface); /* nobody uses this interface anymore */ if (batadv_hardif_cnt(hard_iface->mesh_iface) <= 1) batadv_gw_check_client_stop(bat_priv); hard_iface->mesh_iface = NULL; batadv_hardif_put(hard_iface); out: batadv_hardif_put(primary_if); } static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { struct batadv_hard_iface *hard_iface; ASSERT_RTNL(); if (!batadv_is_valid_iface(net_dev)) return NULL; hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) return NULL; netdev_hold(net_dev, &hard_iface->dev_tracker, GFP_ATOMIC); hard_iface->net_dev = net_dev; hard_iface->mesh_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev); if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; atomic_set(&hard_iface->hop_penalty, 0); batadv_v_hardif_init(hard_iface); kref_get(&hard_iface->refcount); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); batadv_hardif_generation++; return hard_iface; } static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) { ASSERT_RTNL(); /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) batadv_hardif_disable_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; batadv_hardif_put(hard_iface); } /** * batadv_hard_if_event_meshif() - Handle events for mesh interfaces * @event: NETDEV_* event to handle * @net_dev: net_device which generated an event * * Return: NOTIFY_* result */ static int batadv_hard_if_event_meshif(unsigned long event, struct net_device *net_dev) { struct batadv_priv *bat_priv; switch (event) { case NETDEV_REGISTER: bat_priv = netdev_priv(net_dev); batadv_meshif_create_vlan(bat_priv, BATADV_NO_FLAGS); break; } return NOTIFY_DONE; } static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; if (batadv_meshif_is_valid(net_dev)) return batadv_hard_if_event_meshif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || event == NETDEV_POST_TYPE_CHANGE)) hard_iface = batadv_hardif_add_interface(net_dev); if (!hard_iface) goto out; switch (event) { case NETDEV_UP: batadv_hardif_activate_interface(hard_iface); break; case NETDEV_GOING_DOWN: case NETDEV_DOWN: batadv_hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: case NETDEV_PRE_TYPE_CHANGE: list_del_rcu(&hard_iface->list); batadv_hardif_generation++; batadv_hardif_remove_interface(hard_iface); break; case NETDEV_CHANGEMTU: if (hard_iface->mesh_iface) batadv_update_min_mtu(hard_iface->mesh_iface); break; case NETDEV_CHANGEADDR: if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) goto hardif_put; batadv_check_known_mac_addr(hard_iface); bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto hardif_put; if (hard_iface == primary_if) batadv_primary_if_update_addr(bat_priv, NULL); break; case NETDEV_CHANGEUPPER: hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev); if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; default: break; } hardif_put: batadv_hardif_put(hard_iface); out: batadv_hardif_put(primary_if); return NOTIFY_DONE; } struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, };
28 186 230 478 67 72 2 2 2 12495 18 52 1 1336 10025 42 18 11 9 31 8 5801 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_MM_H #define _LINUX_SCHED_MM_H #include <linux/kernel.h> #include <linux/atomic.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/gfp.h> #include <linux/sync_core.h> #include <linux/sched/coredump.h> /* * Routines for handling mm_structs */ extern struct mm_struct *mm_alloc(void); /** * mmgrab() - Pin a &struct mm_struct. * @mm: The &struct mm_struct to pin. * * Make sure that @mm will not get freed even after the owning task * exits. This doesn't guarantee that the associated address space * will still exist later on and mmget_not_zero() has to be used before * accessing it. * * This is a preferred way to pin @mm for a longer/unbounded amount * of time. * * Use mmdrop() to release the reference acquired by mmgrab(). * * See also <Documentation/mm/active_mm.rst> for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmgrab(struct mm_struct *mm) { atomic_inc(&mm->mm_count); } static inline void smp_mb__after_mmgrab(void) { smp_mb__after_atomic(); } extern void __mmdrop(struct mm_struct *mm); static inline void mmdrop(struct mm_struct *mm) { /* * The implicit full barrier implied by atomic_dec_and_test() is * required by the membarrier system call before returning to * user-space, after storing to rq->curr. */ if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } #ifdef CONFIG_PREEMPT_RT /* * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is * by far the least expensive way to do that. */ static inline void __mmdrop_delayed(struct rcu_head *rhp) { struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); __mmdrop(mm); } /* * Invoked from finish_task_switch(). Delegates the heavy lifting on RT * kernels via RCU. */ static inline void mmdrop_sched(struct mm_struct *mm) { /* Provides a full memory barrier. See mmdrop() */ if (atomic_dec_and_test(&mm->mm_count)) call_rcu(&mm->delayed_drop, __mmdrop_delayed); } #else static inline void mmdrop_sched(struct mm_struct *mm) { mmdrop(mm); } #endif /* Helpers for lazy TLB mm refcounting */ static inline void mmgrab_lazy_tlb(struct mm_struct *mm) { if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) mmgrab(mm); } static inline void mmdrop_lazy_tlb(struct mm_struct *mm) { if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) { mmdrop(mm); } else { /* * mmdrop_lazy_tlb must provide a full memory barrier, see the * membarrier comment finish_task_switch which relies on this. */ smp_mb(); } } static inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm) { if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) mmdrop_sched(mm); else smp_mb(); /* see mmdrop_lazy_tlb() above */ } /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. * * Make sure that the address space of the given &struct mm_struct doesn't * go away. This does not protect against parts of the address space being * modified or freed, however. * * Never use this function to pin this address space for an * unbounded/indefinite amount of time. * * Use mmput() to release the reference acquired by mmget(). * * See also <Documentation/mm/active_mm.rst> for an in-depth explanation * of &mm_struct.mm_count vs &mm_struct.mm_users. */ static inline void mmget(struct mm_struct *mm) { atomic_inc(&mm->mm_users); } static inline bool mmget_not_zero(struct mm_struct *mm) { return atomic_inc_not_zero(&mm->mm_users); } /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); #if defined(CONFIG_MMU) || defined(CONFIG_FUTEX_PRIVATE_HASH) /* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ void mmput_async(struct mm_struct *); #endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); /* * Grab a reference to a task's mm, if it is not already going away * and ptrace_may_access with the mode parameter passed to it * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct on exit() */ extern void exit_mm_release(struct task_struct *, struct mm_struct *); /* Remove the current tasks stale references to the old mm_struct on exec() */ extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); #else static inline void mm_update_next_owner(struct mm_struct *mm) { } #endif /* CONFIG_MEMCG */ #ifdef CONFIG_MMU #ifndef arch_get_mmap_end #define arch_get_mmap_end(addr, len, flags) (TASK_SIZE) #endif #ifndef arch_get_mmap_base #define arch_get_mmap_base(addr, base) (base) #endif extern void arch_pick_mmap_layout(struct mm_struct *mm, const struct rlimit *rlim_stack); unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t); unsigned long mm_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); unsigned long mm_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); unsigned long generic_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); unsigned long generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, const struct rlimit *rlim_stack) {} #endif static inline bool in_vfork(struct task_struct *tsk) { bool ret; /* * need RCU to access ->real_parent if CLONE_VM was used along with * CLONE_PARENT. * * We check real_parent->mm == tsk->mm because CLONE_VFORK does not * imply CLONE_VM * * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus * ->real_parent is not necessarily the task doing vfork(), so in * theory we can't rely on task_lock() if we want to dereference it. * * And in this case we can't trust the real_parent->mm == tsk->mm * check, it can be false negative. But we do not care, if init or * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); ret = tsk->vfork_done && rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; } /* * Applies per-task gfp context to the given allocation flags. * PF_MEMALLOC_NOIO implies GFP_NOIO * PF_MEMALLOC_NOFS implies GFP_NOFS * PF_MEMALLOC_PIN implies !GFP_MOVABLE */ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence */ if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } return flags; } #ifdef CONFIG_LOCKDEP extern void __fs_reclaim_acquire(unsigned long ip); extern void __fs_reclaim_release(unsigned long ip); extern void fs_reclaim_acquire(gfp_t gfp_mask); extern void fs_reclaim_release(gfp_t gfp_mask); #else static inline void __fs_reclaim_acquire(unsigned long ip) { } static inline void __fs_reclaim_release(unsigned long ip) { } static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif /* Any memory-allocation retry loop should use * memalloc_retry_wait(), and pass the flags for the most * constrained allocation attempt that might have failed. * This provides useful documentation of where loops are, * and a central place to fine tune the waiting as the MM * implementation changes. */ static inline void memalloc_retry_wait(gfp_t gfp_flags) { /* We use io_schedule_timeout because waiting for memory * typically included waiting for dirty pages to be * written out, which requires IO. */ __set_current_state(TASK_UNINTERRUPTIBLE); gfp_flags = current_gfp_context(gfp_flags); if (gfpflags_allow_blocking(gfp_flags) && !(gfp_flags & __GFP_NORETRY)) /* Probably waited already, no need for much more */ io_schedule_timeout(1); else /* Probably didn't wait, and has now released a lock, * so now is a good time to wait */ io_schedule_timeout(HZ/50); } /** * might_alloc - Mark possible allocation sites * @gfp_mask: gfp_t flags that would be used to allocate * * Similar to might_sleep() and other annotations, this can be used in functions * that might allocate, but often don't. Compiles to nothing without * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. */ static inline void might_alloc(gfp_t gfp_mask) { fs_reclaim_acquire(gfp_mask); fs_reclaim_release(gfp_mask); if (current->flags & PF_MEMALLOC) return; might_sleep_if(gfpflags_allow_blocking(gfp_mask)); } /** * memalloc_flags_save - Add a PF_* flag to current->flags, save old value * @flags: Flags to add. * * This allows PF_* flags to be conveniently added, irrespective of current * value, and then the old version restored with memalloc_flags_restore(). */ static inline unsigned memalloc_flags_save(unsigned flags) { unsigned oldflags = ~current->flags & flags; current->flags |= flags; return oldflags; } static inline void memalloc_flags_restore(unsigned flags) { current->flags &= ~flags; } /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. * All further allocations will implicitly drop __GFP_IO flag and so * they are safe for the IO critical section from the allocation recursion * point of view. Use memalloc_noio_restore to end the scope with flags * returned by this function. * * Context: This function is safe to be used from any context. * Return: The saved flags to be passed to memalloc_noio_restore. */ static inline unsigned int memalloc_noio_save(void) { return memalloc_flags_save(PF_MEMALLOC_NOIO); } /** * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. * @flags: Flags to restore. * * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. * Always make sure that the given flags is the return value from the * pairing memalloc_noio_save call. */ static inline void memalloc_noio_restore(unsigned int flags) { memalloc_flags_restore(flags); } /** * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. * * This functions marks the beginning of the GFP_NOFS allocation scope. * All further allocations will implicitly drop __GFP_FS flag and so * they are safe for the FS critical section from the allocation recursion * point of view. Use memalloc_nofs_restore to end the scope with flags * returned by this function. * * Context: This function is safe to be used from any context. * Return: The saved flags to be passed to memalloc_nofs_restore. */ static inline unsigned int memalloc_nofs_save(void) { return memalloc_flags_save(PF_MEMALLOC_NOFS); } /** * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. * @flags: Flags to restore. * * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. * Always make sure that the given flags is the return value from the * pairing memalloc_nofs_save call. */ static inline void memalloc_nofs_restore(unsigned int flags) { memalloc_flags_restore(flags); } /** * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope. * * This function marks the beginning of the __GFP_MEMALLOC allocation scope. * All further allocations will implicitly add the __GFP_MEMALLOC flag, which * prevents entering reclaim and allows access to all memory reserves. This * should only be used when the caller guarantees the allocation will allow more * memory to be freed very shortly, i.e. it needs to allocate some memory in * the process of freeing memory, and cannot reclaim due to potential recursion. * * Users of this scope have to be extremely careful to not deplete the reserves * completely and implement a throttling mechanism which controls the * consumption of the reserve based on the amount of freed memory. Usage of a * pre-allocated pool (e.g. mempool) should be always considered before using * this scope. * * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC * * Context: This function should not be used in an interrupt context as that one * does not give PF_MEMALLOC access to reserves. * See __gfp_pfmemalloc_flags(). * Return: The saved flags to be passed to memalloc_noreclaim_restore. */ static inline unsigned int memalloc_noreclaim_save(void) { return memalloc_flags_save(PF_MEMALLOC); } /** * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope. * @flags: Flags to restore. * * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save * function. Always make sure that the given flags is the return value from the * pairing memalloc_noreclaim_save call. */ static inline void memalloc_noreclaim_restore(unsigned int flags) { memalloc_flags_restore(flags); } /** * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope. * * This function marks the beginning of the ~__GFP_MOVABLE allocation scope. * All further allocations will implicitly remove the __GFP_MOVABLE flag, which * will constraint the allocations to zones that allow long term pinning, i.e. * not ZONE_MOVABLE zones. * * Return: The saved flags to be passed to memalloc_pin_restore. */ static inline unsigned int memalloc_pin_save(void) { return memalloc_flags_save(PF_MEMALLOC_PIN); } /** * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope. * @flags: Flags to restore. * * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function. * Always make sure that the given flags is the return value from the pairing * memalloc_pin_save call. */ static inline void memalloc_pin_restore(unsigned int flags) { memalloc_flags_restore(flags); } #ifdef CONFIG_MEMCG DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); /** * set_active_memcg - Starts the remote memcg charging scope. * @memcg: memcg to charge. * * This function marks the beginning of the remote memcg charging scope. All the * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * * Please, make sure that caller has a reference to the passed memcg structure, * so its lifetime is guaranteed to exceed the scope between two * set_active_memcg() calls. * * NOTE: This function can nest. Users must save the return value and * reset the previous value after their own charging scope is over. */ static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { struct mem_cgroup *old; if (!in_task()) { old = this_cpu_read(int_active_memcg); this_cpu_write(int_active_memcg, memcg); } else { old = current->active_memcg; current->active_memcg = memcg; } return old; } #else static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { return NULL; } #endif #ifdef CONFIG_MEMBARRIER enum { MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), }; enum { MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), MEMBARRIER_FLAG_RSEQ = (1U << 1), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS #include <asm/membarrier.h> #endif static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { /* * The atomic_read() below prevents CSE. The following should * help the compiler generate more efficient code on architectures * where sync_core_before_usermode() is a no-op. */ if (!IS_ENABLED(CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE)) return; if (current->mm != mm) return; if (likely(!(atomic_read(&mm->membarrier_state) & MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) return; sync_core_before_usermode(); } extern void membarrier_exec_mmap(struct mm_struct *mm); extern void membarrier_update_current_mm(struct mm_struct *next_mm); #else #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS static inline void membarrier_arch_switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { } #endif static inline void membarrier_exec_mmap(struct mm_struct *mm) { } static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { } static inline void membarrier_update_current_mm(struct mm_struct *next_mm) { } #endif #endif /* _LINUX_SCHED_MM_H */
38 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 /* * User address space access functions. * * For licencing details see kernel-base/COPYING */ #include <linux/uaccess.h> #include <linux/export.h> #include <linux/instrumented.h> #include <asm/tlbflush.h> /** * copy_from_user_nmi - NMI safe copy from user * @to: Pointer to the destination buffer * @from: Pointer to a user space address of the current task * @n: Number of bytes to copy * * Returns: The number of not copied bytes. 0 is success, i.e. all bytes copied * * Contrary to other copy_from_user() variants this function can be called * from NMI context. Despite the name it is not restricted to be called * from NMI context. It is safe to be called from any other context as * well. It disables pagefaults across the copy which means a fault will * abort the copy. * * For NMI context invocations this relies on the nested NMI work to allow * atomic faults from the NMI path; the nested NMI paths are careful to * preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { unsigned long ret; if (!__access_ok(from, n)) return n; if (!nmi_uaccess_okay()) return n; /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when * called from other contexts. */ pagefault_disable(); instrument_copy_from_user_before(to, from, n); ret = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, ret); pagefault_enable(); return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi);
332 272 314 89 252 277 333 333 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/gcd.h> #include <linux/export.h> /* * This implements the binary GCD algorithm. (Often attributed to Stein, * but as Knuth has noted, appears in a first-century Chinese math text.) * * This is faster than the division-based algorithm even on x86, which * has decent hardware division. */ DEFINE_STATIC_KEY_TRUE(efficient_ffs_key); #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ static unsigned long binary_gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; b >>= __ffs(b); if (b == 1) return r & -r; for (;;) { a >>= __ffs(a); if (a == 1) return r & -r; if (a == b) return a << __ffs(r); if (a < b) swap(a, b); a -= b; } } #endif /* If normalization is done by loops, the even/odd algorithm is a win. */ /** * gcd - calculate and return the greatest common divisor of 2 unsigned longs * @a: first value * @b: second value */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) if (static_branch_likely(&efficient_ffs_key)) return binary_gcd(a, b); #endif /* Isolate lsbit of r */ r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } EXPORT_SYMBOL_GPL(gcd);
1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 // SPDX-License-Identifier: GPL-2.0-or-later /* * kinect sensor device camera, gspca driver * * Copyright (C) 2011 Antonio Ospite <ospite@studenti.unina.it> * * Based on the OpenKinect project and libfreenect * http://openkinect.org/wiki/Init_Analysis * * Special thanks to Steven Toth and kernellabs.com for sponsoring a Kinect * sensor device which I tested the driver on. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "kinect" #include "gspca.h" #define CTRL_TIMEOUT 500 MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>"); MODULE_DESCRIPTION("GSPCA/Kinect Sensor Device USB Camera Driver"); MODULE_LICENSE("GPL"); static bool depth_mode; struct pkt_hdr { uint8_t magic[2]; uint8_t pad; uint8_t flag; uint8_t unk1; uint8_t seq; uint8_t unk2; uint8_t unk3; uint32_t timestamp; }; struct cam_hdr { uint8_t magic[2]; __le16 len; __le16 cmd; __le16 tag; }; /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ uint16_t cam_tag; /* a sequence number for packets */ uint8_t stream_flag; /* to identify different stream types */ uint8_t obuf[0x400]; /* output buffer for control commands */ uint8_t ibuf[0x200]; /* input buffer for control commands */ }; #define MODE_640x480 0x0001 #define MODE_640x488 0x0002 #define MODE_1280x1024 0x0004 #define FORMAT_BAYER 0x0010 #define FORMAT_UYVY 0x0020 #define FORMAT_Y10B 0x0040 #define FPS_HIGH 0x0100 static const struct v4l2_pix_format depth_camera_mode[] = { {640, 480, V4L2_PIX_FMT_Y10BPACK, V4L2_FIELD_NONE, .bytesperline = 640 * 10 / 8, .sizeimage = 640 * 480 * 10 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x488 | FORMAT_Y10B}, }; static const struct v4l2_pix_format video_camera_mode[] = { {640, 480, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x480 | FORMAT_BAYER | FPS_HIGH}, {640, 480, V4L2_PIX_FMT_UYVY, V4L2_FIELD_NONE, .bytesperline = 640 * 2, .sizeimage = 640 * 480 * 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x480 | FORMAT_UYVY}, {1280, 1024, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 1280, .sizeimage = 1280 * 1024, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_1280x1024 | FORMAT_BAYER}, {640, 488, V4L2_PIX_FMT_Y10BPACK, V4L2_FIELD_NONE, .bytesperline = 640 * 10 / 8, .sizeimage = 640 * 488 * 10 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_640x488 | FORMAT_Y10B | FPS_HIGH}, {1280, 1024, V4L2_PIX_FMT_Y10BPACK, V4L2_FIELD_NONE, .bytesperline = 1280 * 10 / 8, .sizeimage = 1280 * 1024 * 10 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = MODE_1280x1024 | FORMAT_Y10B}, }; static int kinect_write(struct usb_device *udev, uint8_t *data, uint16_t wLength) { return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, wLength, CTRL_TIMEOUT); } static int kinect_read(struct usb_device *udev, uint8_t *data, uint16_t wLength) { return usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, wLength, CTRL_TIMEOUT); } static int send_cmd(struct gspca_dev *gspca_dev, uint16_t cmd, void *cmdbuf, unsigned int cmd_len, void *replybuf, unsigned int reply_len) { struct sd *sd = (struct sd *) gspca_dev; struct usb_device *udev = gspca_dev->dev; int res, actual_len; uint8_t *obuf = sd->obuf; uint8_t *ibuf = sd->ibuf; struct cam_hdr *chdr = (void *)obuf; struct cam_hdr *rhdr = (void *)ibuf; if (cmd_len & 1 || cmd_len > (0x400 - sizeof(*chdr))) { pr_err("send_cmd: Invalid command length (0x%x)\n", cmd_len); return -1; } chdr->magic[0] = 0x47; chdr->magic[1] = 0x4d; chdr->cmd = cpu_to_le16(cmd); chdr->tag = cpu_to_le16(sd->cam_tag); chdr->len = cpu_to_le16(cmd_len / 2); memcpy(obuf+sizeof(*chdr), cmdbuf, cmd_len); res = kinect_write(udev, obuf, cmd_len + sizeof(*chdr)); gspca_dbg(gspca_dev, D_USBO, "Control cmd=%04x tag=%04x len=%04x: %d\n", cmd, sd->cam_tag, cmd_len, res); if (res < 0) { pr_err("send_cmd: Output control transfer failed (%d)\n", res); return res; } do { actual_len = kinect_read(udev, ibuf, 0x200); } while (actual_len == 0); gspca_dbg(gspca_dev, D_USBO, "Control reply: %d\n", actual_len); if (actual_len < (int)sizeof(*rhdr)) { pr_err("send_cmd: Input control transfer failed (%d)\n", actual_len); return actual_len < 0 ? actual_len : -EREMOTEIO; } actual_len -= sizeof(*rhdr); if (rhdr->magic[0] != 0x52 || rhdr->magic[1] != 0x42) { pr_err("send_cmd: Bad magic %02x %02x\n", rhdr->magic[0], rhdr->magic[1]); return -1; } if (rhdr->cmd != chdr->cmd) { pr_err("send_cmd: Bad cmd %02x != %02x\n", rhdr->cmd, chdr->cmd); return -1; } if (rhdr->tag != chdr->tag) { pr_err("send_cmd: Bad tag %04x != %04x\n", rhdr->tag, chdr->tag); return -1; } if (le16_to_cpu(rhdr->len) != (actual_len/2)) { pr_err("send_cmd: Bad len %04x != %04x\n", le16_to_cpu(rhdr->len), (int)(actual_len/2)); return -1; } if (actual_len > reply_len) { pr_warn("send_cmd: Data buffer is %d bytes long, but got %d bytes\n", reply_len, actual_len); memcpy(replybuf, ibuf+sizeof(*rhdr), reply_len); } else { memcpy(replybuf, ibuf+sizeof(*rhdr), actual_len); } sd->cam_tag++; return actual_len; } static int write_register(struct gspca_dev *gspca_dev, uint16_t reg, uint16_t data) { uint16_t reply[2]; __le16 cmd[2]; int res; cmd[0] = cpu_to_le16(reg); cmd[1] = cpu_to_le16(data); gspca_dbg(gspca_dev, D_USBO, "Write Reg 0x%04x <= 0x%02x\n", reg, data); res = send_cmd(gspca_dev, 0x03, cmd, 4, reply, 4); if (res < 0) return res; if (res != 2) { pr_warn("send_cmd returned %d [%04x %04x], 0000 expected\n", res, reply[0], reply[1]); } return 0; } /* this function is called at probe time */ static int sd_config_video(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; sd->cam_tag = 0; sd->stream_flag = 0x80; cam = &gspca_dev->cam; cam->cam_mode = video_camera_mode; cam->nmodes = ARRAY_SIZE(video_camera_mode); gspca_dev->xfer_ep = 0x81; #if 0 /* Setting those values is not needed for video stream */ cam->npkt = 15; gspca_dev->pkt_size = 960 * 2; #endif return 0; } static int sd_config_depth(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; sd->cam_tag = 0; sd->stream_flag = 0x70; cam = &gspca_dev->cam; cam->cam_mode = depth_camera_mode; cam->nmodes = ARRAY_SIZE(depth_camera_mode); gspca_dev->xfer_ep = 0x82; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { gspca_dbg(gspca_dev, D_PROBE, "Kinect Camera device.\n"); return 0; } static int sd_start_video(struct gspca_dev *gspca_dev) { int mode; uint8_t fmt_reg, fmt_val; uint8_t res_reg, res_val; uint8_t fps_reg, fps_val; uint8_t mode_val; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; if (mode & FORMAT_Y10B) { fmt_reg = 0x19; res_reg = 0x1a; fps_reg = 0x1b; mode_val = 0x03; } else { fmt_reg = 0x0c; res_reg = 0x0d; fps_reg = 0x0e; mode_val = 0x01; } /* format */ if (mode & FORMAT_UYVY) fmt_val = 0x05; else fmt_val = 0x00; if (mode & MODE_1280x1024) res_val = 0x02; else res_val = 0x01; if (mode & FPS_HIGH) fps_val = 0x1e; else fps_val = 0x0f; /* turn off IR-reset function */ write_register(gspca_dev, 0x105, 0x00); /* Reset video stream */ write_register(gspca_dev, 0x05, 0x00); /* Due to some ridiculous condition in the firmware, we have to start * and stop the depth stream before the camera will hand us 1280x1024 * IR. This is a stupid workaround, but we've yet to find a better * solution. * * Thanks to Drew Fisher for figuring this out. */ if (mode & (FORMAT_Y10B | MODE_1280x1024)) { write_register(gspca_dev, 0x13, 0x01); write_register(gspca_dev, 0x14, 0x1e); write_register(gspca_dev, 0x06, 0x02); write_register(gspca_dev, 0x06, 0x00); } write_register(gspca_dev, fmt_reg, fmt_val); write_register(gspca_dev, res_reg, res_val); write_register(gspca_dev, fps_reg, fps_val); /* Start video stream */ write_register(gspca_dev, 0x05, mode_val); /* disable Hflip */ write_register(gspca_dev, 0x47, 0x00); return 0; } static int sd_start_depth(struct gspca_dev *gspca_dev) { /* turn off IR-reset function */ write_register(gspca_dev, 0x105, 0x00); /* reset depth stream */ write_register(gspca_dev, 0x06, 0x00); /* Depth Stream Format 0x03: 11 bit stream | 0x02: 10 bit */ write_register(gspca_dev, 0x12, 0x02); /* Depth Stream Resolution 1: standard (640x480) */ write_register(gspca_dev, 0x13, 0x01); /* Depth Framerate / 0x1e (30): 30 fps */ write_register(gspca_dev, 0x14, 0x1e); /* Depth Stream Control / 2: Open Depth Stream */ write_register(gspca_dev, 0x06, 0x02); /* disable depth hflip / LSB = 0: Smoothing Disabled */ write_register(gspca_dev, 0x17, 0x00); return 0; } static void sd_stopN_video(struct gspca_dev *gspca_dev) { /* reset video stream */ write_register(gspca_dev, 0x05, 0x00); } static void sd_stopN_depth(struct gspca_dev *gspca_dev) { /* reset depth stream */ write_register(gspca_dev, 0x06, 0x00); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *__data, int len) { struct sd *sd = (struct sd *) gspca_dev; struct pkt_hdr *hdr = (void *)__data; uint8_t *data = __data + sizeof(*hdr); int datalen = len - sizeof(*hdr); uint8_t sof = sd->stream_flag | 1; uint8_t mof = sd->stream_flag | 2; uint8_t eof = sd->stream_flag | 5; if (len < 12) return; if (hdr->magic[0] != 'R' || hdr->magic[1] != 'B') { pr_warn("[Stream %02x] Invalid magic %02x%02x\n", sd->stream_flag, hdr->magic[0], hdr->magic[1]); return; } if (hdr->flag == sof) gspca_frame_add(gspca_dev, FIRST_PACKET, data, datalen); else if (hdr->flag == mof) gspca_frame_add(gspca_dev, INTER_PACKET, data, datalen); else if (hdr->flag == eof) gspca_frame_add(gspca_dev, LAST_PACKET, data, datalen); else pr_warn("Packet type not recognized...\n"); } /* sub-driver description */ static const struct sd_desc sd_desc_video = { .name = MODULE_NAME, .config = sd_config_video, .init = sd_init, .start = sd_start_video, .stopN = sd_stopN_video, .pkt_scan = sd_pkt_scan, /* .get_streamparm = sd_get_streamparm, .set_streamparm = sd_set_streamparm, */ }; static const struct sd_desc sd_desc_depth = { .name = MODULE_NAME, .config = sd_config_depth, .init = sd_init, .start = sd_start_depth, .stopN = sd_stopN_depth, .pkt_scan = sd_pkt_scan, /* .get_streamparm = sd_get_streamparm, .set_streamparm = sd_set_streamparm, */ }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x045e, 0x02ae)}, {USB_DEVICE(0x045e, 0x02bf)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { if (depth_mode) return gspca_dev_probe(intf, id, &sd_desc_depth, sizeof(struct sd), THIS_MODULE); else return gspca_dev_probe(intf, id, &sd_desc_video, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); module_param(depth_mode, bool, 0644); MODULE_PARM_DESC(depth_mode, "0=video 1=depth");
2 7 7 1 2 3 3 2 2 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 // SPDX-License-Identifier: GPL-2.0-only /* * * YeAH TCP * * For further details look at: * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf * */ #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/inet_diag.h> #include <net/tcp.h> #include "tcp_vegas.h" #define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */ #define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */ #define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */ #define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */ #define TCP_YEAH_PHY 8 /* maximum delta from base */ #define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */ #define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */ #define TCP_SCALABLE_AI_CNT 100U /* YeAH variables */ struct yeah { struct vegas vegas; /* must be first */ /* YeAH */ u32 lastQ; u32 doing_reno_now; u32 reno_count; u32 fast_count; }; static void tcp_yeah_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); tcp_vegas_init(sk); yeah->doing_reno_now = 0; yeah->lastQ = 0; yeah->reno_count = 2; /* Ensure the MD arithmetic works. This is somewhat pedantic, * since I don't think we will see a cwnd this large. :) */ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) goto do_vegas; } if (!yeah->doing_reno_now) { /* Scalable */ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT), acked); } else { /* Reno */ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. * * These are so named because they represent the approximate values * of snd_una and snd_nxt at the beginning of the current RTT. More * precisely, they represent the amount of data sent during the RTT. * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding * bytes of data have been ACKed during the course of the RTT, giving * an "actual" rate of: * * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration) * * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una, * because delayed ACKs can cover more than one segment, so they * don't line up yeahly with the boundaries of RTTs. * * Another unfortunate fact of life is that delayed ACKs delay the * advance of the left edge of our send window, so that the number * of bytes we send in an RTT is often less than our cwnd will allow. * So we keep track of our cwnd separately, in v_beg_snd_cwnd. */ do_vegas: if (after(ack, yeah->vegas.beg_snd_nxt)) { /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got * at least one RTT sample that wasn't from a delayed ACK. * If we only had 2 samples total, * then that means we're getting only 1 ACK per RTT, which * means they're almost certainly delayed ACKs. * If we have 3 samples, we should be OK. */ if (yeah->vegas.cntRTT > 2) { u32 rtt, queue; u64 bw; /* We have enough RTT samples, so, using the Vegas * algorithm, we determine if we should increase or * decrease cwnd, and by how much. */ /* Pluck out the RTT we are using for the Vegas * calculations. This is the min RTT seen during the * last RTT. Taking the min filters out the effects * of delayed ACKs, at the cost of noticing congestion * a bit later. */ rtt = yeah->vegas.minRTT; /* Compute excess number of packets above bandwidth * Avoid doing full 64 bit divide. */ bw = tcp_snd_cwnd(tp); bw *= rtt - yeah->vegas.baseRTT; do_div(bw, rtt); queue = bw; if (queue > TCP_YEAH_ALPHA || rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { if (queue > TCP_YEAH_ALPHA && tcp_snd_cwnd(tp) > yeah->reno_count) { u32 reduction = min(queue / TCP_YEAH_GAMMA , tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON); tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction); tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), yeah->reno_count)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } if (yeah->reno_count <= 2) yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U); else yeah->reno_count++; yeah->doing_reno_now = min(yeah->doing_reno_now + 1, 0xffffffU); } else { yeah->fast_count++; if (yeah->fast_count > TCP_YEAH_ZETA) { yeah->reno_count = 2; yeah->fast_count = 0; } yeah->doing_reno_now = 0; } yeah->lastQ = queue; } /* Save the extent of the current window so we can use this * at the end of the next RTT. */ yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt; yeah->vegas.beg_snd_nxt = tp->snd_nxt; yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp); /* Wipe the slate clean for the next RTT. */ yeah->vegas.cntRTT = 0; yeah->vegas.minRTT = 0x7fffffff; } } static u32 tcp_yeah_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); u32 reduction; if (yeah->doing_reno_now < TCP_YEAH_RHO) { reduction = yeah->lastQ; reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U)); reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA); } else reduction = max(tcp_snd_cwnd(tp)>>1, 2U); yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); return max_t(int, tcp_snd_cwnd(tp) - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_yeah_cong_avoid, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, .pkts_acked = tcp_vegas_pkts_acked, .owner = THIS_MODULE, .name = "yeah", }; static int __init tcp_yeah_register(void) { BUILD_BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_yeah); return 0; } static void __exit tcp_yeah_unregister(void) { tcp_unregister_congestion_control(&tcp_yeah); } module_init(tcp_yeah_register); module_exit(tcp_yeah_unregister); MODULE_AUTHOR("Angelo P. Castellani"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("YeAH TCP");
63 61 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 /* SPDX-License-Identifier: GPL-2.0 */ /* * Internal header to deal with irq_desc->status which will be renamed * to irq_desc->settings. */ enum { _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS, _IRQ_PER_CPU = IRQ_PER_CPU, _IRQ_LEVEL = IRQ_LEVEL, _IRQ_NOPROBE = IRQ_NOPROBE, _IRQ_NOREQUEST = IRQ_NOREQUEST, _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, _IRQ_IS_POLLED = IRQ_IS_POLLED, _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, _IRQ_HIDDEN = IRQ_HIDDEN, _IRQ_NO_DEBUG = IRQ_NO_DEBUG, _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; #define IRQ_PER_CPU GOT_YOU_MORON #define IRQ_NO_BALANCING GOT_YOU_MORON #define IRQ_LEVEL GOT_YOU_MORON #define IRQ_NOPROBE GOT_YOU_MORON #define IRQ_NOREQUEST GOT_YOU_MORON #define IRQ_NOTHREAD GOT_YOU_MORON #define IRQ_NOAUTOEN GOT_YOU_MORON #define IRQ_NESTED_THREAD GOT_YOU_MORON #define IRQ_PER_CPU_DEVID GOT_YOU_MORON #define IRQ_IS_POLLED GOT_YOU_MORON #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON #define IRQ_HIDDEN GOT_YOU_MORON #define IRQ_NO_DEBUG GOT_YOU_MORON #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON static inline void irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) { desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK); desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); } static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_PER_CPU; } static inline bool irq_settings_is_per_cpu_devid(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_PER_CPU_DEVID; } static inline void irq_settings_set_per_cpu(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_PER_CPU; } static inline void irq_settings_set_no_balancing(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NO_BALANCING; } static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NO_BALANCING; } static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) { return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK; } static inline void irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) { desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK; desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK; } static inline bool irq_settings_is_level(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_LEVEL; } static inline void irq_settings_clr_level(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_LEVEL; } static inline void irq_settings_set_level(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_LEVEL; } static inline bool irq_settings_can_request(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOREQUEST); } static inline void irq_settings_clr_norequest(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOREQUEST; } static inline void irq_settings_set_norequest(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOREQUEST; } static inline bool irq_settings_can_thread(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOTHREAD); } static inline void irq_settings_clr_nothread(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOTHREAD; } static inline void irq_settings_set_nothread(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOTHREAD; } static inline bool irq_settings_can_probe(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOPROBE); } static inline void irq_settings_clr_noprobe(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_NOPROBE; } static inline void irq_settings_set_noprobe(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NOPROBE; } static inline bool irq_settings_can_autoenable(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOAUTOEN); } static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NESTED_THREAD; } static inline bool irq_settings_is_polled(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_IS_POLLED; } static inline bool irq_settings_disable_unlazy(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_DISABLE_UNLAZY; } static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY; } static inline bool irq_settings_is_hidden(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_HIDDEN; } static inline void irq_settings_set_no_debug(struct irq_desc *desc) { desc->status_use_accessors |= _IRQ_NO_DEBUG; } static inline bool irq_settings_no_debug(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_NO_DEBUG; }
315 5959 3848 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef MM_SLAB_H #define MM_SLAB_H #include <linux/reciprocal_div.h> #include <linux/list_lru.h> #include <linux/local_lock.h> #include <linux/random.h> #include <linux/kobject.h> #include <linux/sched/mm.h> #include <linux/memcontrol.h> #include <linux/kfence.h> #include <linux/kasan.h> /* * Internal slab definitions */ #ifdef CONFIG_64BIT # ifdef system_has_cmpxchg128 # define system_has_freelist_aba() system_has_cmpxchg128() # define try_cmpxchg_freelist try_cmpxchg128 # endif typedef u128 freelist_full_t; #else /* CONFIG_64BIT */ # ifdef system_has_cmpxchg64 # define system_has_freelist_aba() system_has_cmpxchg64() # define try_cmpxchg_freelist try_cmpxchg64 # endif typedef u64 freelist_full_t; #endif /* CONFIG_64BIT */ #if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) #undef system_has_freelist_aba #endif /* * Freelist pointer and counter to cmpxchg together, avoids the typical ABA * problems with cmpxchg of just a pointer. */ struct freelist_counters { union { struct { void *freelist; union { unsigned long counters; struct { unsigned inuse:16; unsigned objects:15; /* * If slab debugging is enabled then the * frozen bit can be reused to indicate * that the slab was corrupted */ unsigned frozen:1; #ifdef CONFIG_64BIT /* * Some optimizations use free bits in 'counters' field * to save memory. In case ->stride field is not available, * such optimizations are disabled. */ unsigned short stride; #endif }; }; }; #ifdef system_has_freelist_aba freelist_full_t freelist_counters; #endif }; }; /* Reuses the bits in struct page */ struct slab { memdesc_flags_t flags; struct kmem_cache *slab_cache; union { struct { struct list_head slab_list; /* Double-word boundary */ struct freelist_counters; }; struct rcu_head rcu_head; }; unsigned int __page_type; atomic_t __page_refcount; #ifdef CONFIG_SLAB_OBJ_EXT unsigned long obj_exts; #endif }; #define SLAB_MATCH(pg, sl) \ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) SLAB_MATCH(flags, flags); SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG SLAB_MATCH(memcg_data, obj_exts); #elif defined(CONFIG_SLAB_OBJ_EXT) SLAB_MATCH(_unused_slab_obj_exts, obj_exts); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); #if defined(system_has_freelist_aba) static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist_counters))); #endif /** * slab_folio - The folio allocated for a slab * @s: The slab. * * Slabs are allocated as folios that contain the individual objects and are * using some fields in the first struct page of the folio - those fields are * now accessed by struct slab. It is occasionally necessary to convert back to * a folio in order to communicate with the rest of the mm. Please use this * helper function instead of casting yourself, as the implementation may change * in the future. */ #define slab_folio(s) (_Generic((s), \ const struct slab *: (const struct folio *)s, \ struct slab *: (struct folio *)s)) /** * page_slab - Converts from struct page to its slab. * @page: A page which may or may not belong to a slab. * * Return: The slab which contains this page or NULL if the page does * not belong to a slab. This includes pages returned from large kmalloc. */ static inline struct slab *page_slab(const struct page *page) { unsigned long head; head = READ_ONCE(page->compound_head); if (head & 1) page = (struct page *)(head - 1); if (data_race(page->page_type >> 24) != PGTY_slab) page = NULL; return (struct slab *)page; } /** * slab_page - The first struct page allocated for a slab * @s: The slab. * * A convenience wrapper for converting slab to the first struct page of the * underlying folio, to communicate with code not yet converted to folio or * struct slab. */ #define slab_page(s) folio_page(slab_folio(s), 0) static inline void *slab_address(const struct slab *slab) { return folio_address(slab_folio(slab)); } static inline int slab_nid(const struct slab *slab) { return memdesc_nid(slab->flags); } static inline pg_data_t *slab_pgdat(const struct slab *slab) { return NODE_DATA(slab_nid(slab)); } static inline struct slab *virt_to_slab(const void *addr) { return page_slab(virt_to_page(addr)); } static inline int slab_order(const struct slab *slab) { return folio_order(slab_folio(slab)); } static inline size_t slab_size(const struct slab *slab) { return PAGE_SIZE << slab_order(slab); } /* * Word size structure that can be atomically updated or read and that * contains both the order and the number of objects that a slab of the * given order would contain. */ struct kmem_cache_order_objects { unsigned int x; }; /* * Slab cache management. */ struct kmem_cache { struct slub_percpu_sheaves __percpu *cpu_sheaves; /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; unsigned long min_partial; unsigned int size; /* Object size including metadata */ unsigned int object_size; /* Object size without metadata */ struct reciprocal_value reciprocal_size; unsigned int offset; /* Free pointer offset */ unsigned int sheaf_capacity; struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(void *object); /* Object constructor */ unsigned int inuse; /* Offset to metadata */ unsigned int align; /* Alignment */ unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif #ifdef CONFIG_SLAB_FREELIST_HARDENED unsigned long random; #endif #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. */ unsigned int remote_node_defrag_ratio; #endif #ifdef CONFIG_SLAB_FREELIST_RANDOM unsigned int *random_seq; #endif #ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif #ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ #endif #ifdef CONFIG_SLUB_STATS struct kmem_cache_stats __percpu *cpu_stats; #endif struct kmem_cache_node *node[MAX_NUMNODES]; }; /* * Every cache has !NULL s->cpu_sheaves but they may point to the * bootstrap_sheaf temporarily during init, or permanently for the boot caches * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This * helper distinguishes whether cache has real non-bootstrap sheaves. */ static inline bool cache_has_sheaves(struct kmem_cache *s) { /* Test CONFIG_SLUB_TINY for code elimination purposes */ return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity; } #if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) #define SLAB_SUPPORTS_SYSFS 1 void sysfs_slab_unlink(struct kmem_cache *s); void sysfs_slab_release(struct kmem_cache *s); int sysfs_slab_alias(struct kmem_cache *s, const char *name); #else static inline void sysfs_slab_unlink(struct kmem_cache *s) { } static inline void sysfs_slab_release(struct kmem_cache *s) { } static inline int sysfs_slab_alias(struct kmem_cache *s, const char *name) { return 0; } #endif void *fixup_red_left(struct kmem_cache *s, void *p); static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { void *object = x - (x - slab_address(slab)) % cache->size; void *last_object = slab_address(slab) + (slab->objects - 1) * cache->size; void *result = (unlikely(object > last_object)) ? last_object : object; result = fixup_red_left(cache, result); return result; } /* Determine object index from a given position */ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, void *addr, void *obj) { return reciprocal_divide(kasan_reset_tag(obj) - addr, cache->reciprocal_size); } static inline unsigned int obj_to_index(const struct kmem_cache *cache, const struct slab *slab, void *obj) { if (is_kfence_address(obj)) return 0; return __obj_to_index(cache, slab_address(slab), obj); } static inline int objs_per_slab(const struct kmem_cache *cache, const struct slab *slab) { return slab->objects; } /* * State of the slab allocator. * * This is used to describe the states of the allocator during bootup. * Allocators use this to gradually bootstrap themselves. Most allocators * have the problem that the structures used for managing slab caches are * allocated from slab caches themselves. */ enum slab_state { DOWN, /* No slab functionality yet */ PARTIAL, /* SLUB: kmem_cache_node available */ UP, /* Slab caches usable but not all extras yet */ FULL /* Everything is working */ }; extern enum slab_state slab_state; /* The slab cache mutex protects the management structures during changes */ extern struct mutex slab_mutex; /* The list of all slab caches on the system */ extern struct list_head slab_caches; /* The slab cache that manages slab cache information */ extern struct kmem_cache *kmem_cache; /* A table of kmalloc cache names and sizes */ extern const struct kmalloc_info_struct { const char *name[NR_KMALLOC_TYPES]; unsigned int size; } kmalloc_info[]; /* Kmalloc array related functions */ void setup_kmalloc_cache_index_table(void); void create_kmalloc_caches(void); extern u8 kmalloc_size_index[24]; static inline unsigned int size_index_elem(unsigned int bytes) { return (bytes - 1) / 8; } /* * Find the kmem_cache structure that serves a given size of * allocation * * This assumes size is larger than zero and not larger than * KMALLOC_MAX_CACHE_SIZE and the caller must check that. */ static inline struct kmem_cache * kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) { unsigned int index; if (!b) b = &kmalloc_caches[kmalloc_type(flags, caller)]; if (size <= 192) index = kmalloc_size_index[size_index_elem(size)]; else index = fls(size - 1); return (*b)[index]; } gfp_t kmalloc_fix_flags(gfp_t flags); /* Functions provided by the slab allocators */ int do_kmem_cache_create(struct kmem_cache *s, const char *name, unsigned int size, struct kmem_cache_args *args, slab_flags_t flags); void __init kmem_cache_init(void); extern void create_boot_cache(struct kmem_cache *, const char *name, unsigned int size, slab_flags_t flags, unsigned int useroffset, unsigned int usersize); int slab_unmergeable(struct kmem_cache *s); bool slab_args_unmergeable(struct kmem_cache_args *args, slab_flags_t flags); slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name); static inline bool is_kmalloc_cache(struct kmem_cache *s) { return (s->flags & SLAB_KMALLOC); } static inline bool is_kmalloc_normal(struct kmem_cache *s) { if (!is_kmalloc_cache(s)) return false; return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); } bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); void flush_all_rcu_sheaves(void); void flush_rcu_sheaves_on_cache(struct kmem_cache *s); #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \ SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT | \ SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS) bool __kmem_cache_empty(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *); int __kmem_cache_shrink(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *); struct seq_file; struct file; struct slabinfo { unsigned long active_objs; unsigned long num_objs; unsigned long active_slabs; unsigned long num_slabs; unsigned long shared_avail; unsigned int limit; unsigned int batchcount; unsigned int shared; unsigned int objects_per_slab; unsigned int cache_order; }; void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); #else DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); #endif extern void print_tracking(struct kmem_cache *s, void *object); long validate_slab_cache(struct kmem_cache *s); static inline bool __slub_debug_enabled(void) { return static_branch_unlikely(&slub_debug_enabled); } #else static inline void print_tracking(struct kmem_cache *s, void *object) { } static inline bool __slub_debug_enabled(void) { return false; } #endif /* * Returns true if any of the specified slab_debug flags is enabled for the * cache. Use only for flags parsed by setup_slub_debug() as it also enables * the static key. */ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) { if (IS_ENABLED(CONFIG_SLUB_DEBUG)) VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); if (__slub_debug_enabled()) return s->flags & flags; return false; } #if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT) bool slab_in_kunit_test(void); #else static inline bool slab_in_kunit_test(void) { return false; } #endif /* * slub is about to manipulate internal object metadata. This memory lies * outside the range of the allocated object, so accessing it would normally * be reported by kasan as a bounds error. metadata_access_enable() is used * to tell kasan that these accesses are OK. */ static inline void metadata_access_enable(void) { kasan_disable_current(); kmsan_disable_current(); } static inline void metadata_access_disable(void) { kmsan_enable_current(); kasan_enable_current(); } #ifdef CONFIG_SLAB_OBJ_EXT /* * slab_obj_exts - get the pointer to the slab object extension vector * associated with a slab. * @slab: a pointer to the slab struct * * Returns the address of the object extension vector associated with the slab, * or zero if no such vector has been associated yet. * Do not dereference the return value directly; use get/put_slab_obj_exts() * pair and slab_obj_ext() to access individual elements. * * Example usage: * * obj_exts = slab_obj_exts(slab); * if (obj_exts) { * get_slab_obj_exts(obj_exts); * obj_ext = slab_obj_ext(slab, obj_exts, obj_to_index(s, slab, obj)); * // do something with obj_ext * put_slab_obj_exts(obj_exts); * } * * Note that the get/put semantics does not involve reference counting. * Instead, it updates kasan/kmsan depth so that accesses to slabobj_ext * won't be reported as access violations. */ static inline unsigned long slab_obj_exts(struct slab *slab) { unsigned long obj_exts = READ_ONCE(slab->obj_exts); #ifdef CONFIG_MEMCG /* * obj_exts should be either NULL, a valid pointer with * MEMCG_DATA_OBJEXTS bit set or be equal to OBJEXTS_ALLOC_FAIL. */ VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS) && obj_exts != OBJEXTS_ALLOC_FAIL, slab_page(slab)); VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); #endif return obj_exts & ~OBJEXTS_FLAGS_MASK; } static inline void get_slab_obj_exts(unsigned long obj_exts) { VM_WARN_ON_ONCE(!obj_exts); metadata_access_enable(); } static inline void put_slab_obj_exts(unsigned long obj_exts) { metadata_access_disable(); } #ifdef CONFIG_64BIT static inline void slab_set_stride(struct slab *slab, unsigned short stride) { slab->stride = stride; } static inline unsigned short slab_get_stride(struct slab *slab) { return slab->stride; } #else static inline void slab_set_stride(struct slab *slab, unsigned short stride) { VM_WARN_ON_ONCE(stride != sizeof(struct slabobj_ext)); } static inline unsigned short slab_get_stride(struct slab *slab) { return sizeof(struct slabobj_ext); } #endif /* * slab_obj_ext - get the pointer to the slab object extension metadata * associated with an object in a slab. * @slab: a pointer to the slab struct * @obj_exts: a pointer to the object extension vector * @index: an index of the object * * Returns a pointer to the object extension associated with the object. * Must be called within a section covered by get/put_slab_obj_exts(). */ static inline struct slabobj_ext *slab_obj_ext(struct slab *slab, unsigned long obj_exts, unsigned int index) { struct slabobj_ext *obj_ext; VM_WARN_ON_ONCE(obj_exts != slab_obj_exts(slab)); obj_ext = (struct slabobj_ext *)(obj_exts + slab_get_stride(slab) * index); return kasan_reset_tag(obj_ext); } int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab); #else /* CONFIG_SLAB_OBJ_EXT */ static inline unsigned long slab_obj_exts(struct slab *slab) { return 0; } static inline struct slabobj_ext *slab_obj_ext(struct slab *slab, unsigned long obj_exts, unsigned int index) { return NULL; } static inline void slab_set_stride(struct slab *slab, unsigned int stride) { } static inline unsigned int slab_get_stride(struct slab *slab) { return 0; } #endif /* CONFIG_SLAB_OBJ_EXT */ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) { return (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; } #ifdef CONFIG_MEMCG bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, gfp_t flags, size_t size, void **p); void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects, unsigned long obj_exts); #endif void kvfree_rcu_cb(struct rcu_head *head); static inline unsigned int large_kmalloc_order(const struct page *page) { return page[1].flags.f & 0xff; } static inline size_t large_kmalloc_size(const struct page *page) { return PAGE_SIZE << large_kmalloc_order(page); } #ifdef CONFIG_SLUB_DEBUG void dump_unreclaimable_slab(void); #else static inline void dump_unreclaimable_slab(void) { } #endif void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); #ifdef CONFIG_SLAB_FREELIST_RANDOM int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, gfp_t gfp); void cache_random_seq_destroy(struct kmem_cache *cachep); #else static inline int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, gfp_t gfp) { return 0; } static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, &init_on_alloc)) { if (c->ctor) return false; if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) return flags & __GFP_ZERO; return true; } return flags & __GFP_ZERO; } static inline bool slab_want_init_on_free(struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, &init_on_free)) return !(c->ctor || (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); return false; } #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) void debugfs_slab_release(struct kmem_cache *); #else static inline void debugfs_slab_release(struct kmem_cache *s) { } #endif #ifdef CONFIG_PRINTK #define KS_ADDRS_COUNT 16 struct kmem_obj_info { void *kp_ptr; struct slab *kp_slab; void *kp_objp; unsigned long kp_data_offset; struct kmem_cache *kp_slab_cache; void *kp_ret; void *kp_stack[KS_ADDRS_COUNT]; void *kp_free_stack[KS_ADDRS_COUNT]; }; void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); #endif void __check_heap_object(const void *ptr, unsigned long n, const struct slab *slab, bool to_user); void defer_free_barrier(void); static inline bool slub_debug_orig_size(struct kmem_cache *s) { return (kmem_cache_debug_flags(s, SLAB_STORE_USER) && (s->flags & SLAB_KMALLOC)); } #ifdef CONFIG_SLUB_DEBUG void skip_orig_size_check(struct kmem_cache *s, const void *object); #endif #endif /* MM_SLAB_H */
2 2 27 14 22 21 3 22 4 22 2 2 2 40 1 39 45 45 6 9 16 16 15 16 15 15 1 15 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/minix/bitmap.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * Modified for 680x0 by Hamish Macdonald * Fixed for 680x0 by Andreas Schwab */ /* bitmap.c contains the code that handles the inode and block bitmaps */ #include "minix.h" #include <linux/buffer_head.h> #include <linux/bitops.h> #include <linux/sched.h> static DEFINE_SPINLOCK(bitmap_lock); /* * bitmap consists of blocks filled with 16bit words * bit set == busy, bit clear == free * endianness is a mess, but for counting zero bits it really doesn't matter... */ static __u32 count_free(struct buffer_head *map[], unsigned blocksize, __u32 numbits) { __u32 sum = 0; unsigned blocks = DIV_ROUND_UP(numbits, blocksize * 8); while (blocks--) { unsigned words = blocksize / 2; __u16 *p = (__u16 *)(*map++)->b_data; while (words--) sum += 16 - hweight16(*p++); } return sum; } void minix_free_block(struct inode *inode, unsigned long block) { struct super_block *sb = inode->i_sb; struct minix_sb_info *sbi = minix_sb(sb); struct buffer_head *bh; int k = sb->s_blocksize_bits + 3; unsigned long bit, zone; if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) { printk("Trying to free block not in datazone\n"); return; } zone = block - sbi->s_firstdatazone + 1; bit = zone & ((1<<k) - 1); zone >>= k; if (zone >= sbi->s_zmap_blocks) { printk("minix_free_block: nonexistent bitmap buffer\n"); return; } bh = sbi->s_zmap[zone]; spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_block (%s:%lu): bit already cleared\n", sb->s_id, block); spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); return; } int minix_new_block(struct inode * inode) { struct minix_sb_info *sbi = minix_sb(inode->i_sb); int bits_per_zone = 8 * inode->i_sb->s_blocksize; int i; for (i = 0; i < sbi->s_zmap_blocks; i++) { struct buffer_head *bh = sbi->s_zmap[i]; int j; spin_lock(&bitmap_lock); j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); if (j < bits_per_zone) { minix_set_bit(j, bh->b_data); spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone + sbi->s_firstdatazone-1; if (j < sbi->s_firstdatazone || j >= sbi->s_nzones) break; return j; } spin_unlock(&bitmap_lock); } return 0; } unsigned long minix_count_free_blocks(struct super_block *sb) { struct minix_sb_info *sbi = minix_sb(sb); u32 bits = sbi->s_nzones - sbi->s_firstdatazone + 1; return (count_free(sbi->s_zmap, sb->s_blocksize, bits) << sbi->s_log_zone_size); } struct minix_inode * minix_V1_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh) { int block; struct minix_sb_info *sbi = minix_sb(sb); struct minix_inode *p; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %ld is out of range\n", sb->s_id, (long)ino); return NULL; } ino--; block = 2 + sbi->s_imap_blocks + sbi->s_zmap_blocks + ino / MINIX_INODES_PER_BLOCK; *bh = sb_bread(sb, block); if (!*bh) { printk("Unable to read inode block\n"); return NULL; } p = (void *)(*bh)->b_data; return p + ino % MINIX_INODES_PER_BLOCK; } struct minix2_inode * minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh) { int block; struct minix_sb_info *sbi = minix_sb(sb); struct minix2_inode *p; int minix2_inodes_per_block = sb->s_blocksize / sizeof(struct minix2_inode); *bh = NULL; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %ld is out of range\n", sb->s_id, (long)ino); return NULL; } ino--; block = 2 + sbi->s_imap_blocks + sbi->s_zmap_blocks + ino / minix2_inodes_per_block; *bh = sb_bread(sb, block); if (!*bh) { printk("Unable to read inode block\n"); return NULL; } p = (void *)(*bh)->b_data; return p + ino % minix2_inodes_per_block; } /* Clear the link count and mode of a deleted inode on disk. */ static void minix_clear_inode(struct inode *inode) { struct buffer_head *bh = NULL; if (INODE_VERSION(inode) == MINIX_V1) { struct minix_inode *raw_inode; raw_inode = minix_V1_raw_inode(inode->i_sb, inode->i_ino, &bh); if (raw_inode) { raw_inode->i_nlinks = 0; raw_inode->i_mode = 0; } } else { struct minix2_inode *raw_inode; raw_inode = minix_V2_raw_inode(inode->i_sb, inode->i_ino, &bh); if (raw_inode) { raw_inode->i_nlinks = 0; raw_inode->i_mode = 0; } } if (bh) { mark_buffer_dirty(bh); brelse (bh); } } void minix_free_inode(struct inode * inode) { struct super_block *sb = inode->i_sb; struct minix_sb_info *sbi = minix_sb(inode->i_sb); struct buffer_head *bh; int k = sb->s_blocksize_bits + 3; unsigned long ino, bit; ino = inode->i_ino; if (ino < 1 || ino > sbi->s_ninodes) { printk("minix_free_inode: inode 0 or nonexistent inode\n"); return; } bit = ino & ((1<<k) - 1); ino >>= k; if (ino >= sbi->s_imap_blocks) { printk("minix_free_inode: nonexistent imap in superblock\n"); return; } minix_clear_inode(inode); /* clear on-disk copy */ bh = sbi->s_imap[ino]; spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_inode: bit %lu already cleared\n", bit); spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); } struct inode *minix_new_inode(const struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; struct minix_sb_info *sbi = minix_sb(sb); struct inode *inode = new_inode(sb); struct buffer_head * bh; int bits_per_zone = 8 * sb->s_blocksize; unsigned long j; int i; if (!inode) return ERR_PTR(-ENOMEM); j = bits_per_zone; bh = NULL; spin_lock(&bitmap_lock); for (i = 0; i < sbi->s_imap_blocks; i++) { bh = sbi->s_imap[i]; j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); if (j < bits_per_zone) break; } if (!bh || j >= bits_per_zone) { spin_unlock(&bitmap_lock); iput(inode); return ERR_PTR(-ENOSPC); } if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */ spin_unlock(&bitmap_lock); printk("minix_new_inode: bit already set\n"); iput(inode); return ERR_PTR(-ENOSPC); } spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone; if (!j || j > sbi->s_ninodes) { iput(inode); return ERR_PTR(-ENOSPC); } inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = j; simple_inode_init_ts(inode); inode->i_blocks = 0; memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u)); insert_inode_hash(inode); mark_inode_dirty(inode); return inode; } unsigned long minix_count_free_inodes(struct super_block *sb) { struct minix_sb_info *sbi = minix_sb(sb); u32 bits = sbi->s_ninodes + 1; return count_free(sbi->s_imap, sb->s_blocksize, bits); }
5 5 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 // SPDX-License-Identifier: GPL-2.0-or-later /* * Spanning tree protocol; BPDU handling * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/kernel.h> #include <linux/netfilter_bridge.h> #include <linux/etherdevice.h> #include <linux/llc.h> #include <linux/slab.h> #include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <net/stp.h> #include <linux/unaligned.h> #include "br_private.h" #include "br_private_stp.h" #define STP_HZ 256 #define LLC_RESERVE sizeof(struct llc_pdu_un) static int br_send_bpdu_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return dev_queue_xmit(skb); } static void br_send_bpdu(struct net_bridge_port *p, const unsigned char *data, int length) { struct sk_buff *skb; skb = dev_alloc_skb(length+LLC_RESERVE); if (!skb) return; skb->dev = p->dev; skb->protocol = htons(ETH_P_802_2); skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, LLC_RESERVE); __skb_put_data(skb, data, length); llc_pdu_header_init(skb, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, LLC_SAP_BSPAN, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); skb_reset_mac_header(skb); NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, dev_net(p->dev), NULL, skb, NULL, skb->dev, br_send_bpdu_finish); } static inline void br_set_ticks(unsigned char *dest, int j) { unsigned long ticks = (STP_HZ * j)/ HZ; put_unaligned_be16(ticks, dest); } static inline int br_get_ticks(const unsigned char *src) { unsigned long ticks = get_unaligned_be16(src); return DIV_ROUND_UP(ticks * HZ, STP_HZ); } /* called under bridge lock */ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) { unsigned char buf[35]; if (p->br->stp_enabled != BR_KERNEL_STP) return; buf[0] = 0; buf[1] = 0; buf[2] = 0; buf[3] = BPDU_TYPE_CONFIG; buf[4] = (bpdu->topology_change ? 0x01 : 0) | (bpdu->topology_change_ack ? 0x80 : 0); buf[5] = bpdu->root.prio[0]; buf[6] = bpdu->root.prio[1]; buf[7] = bpdu->root.addr[0]; buf[8] = bpdu->root.addr[1]; buf[9] = bpdu->root.addr[2]; buf[10] = bpdu->root.addr[3]; buf[11] = bpdu->root.addr[4]; buf[12] = bpdu->root.addr[5]; buf[13] = (bpdu->root_path_cost >> 24) & 0xFF; buf[14] = (bpdu->root_path_cost >> 16) & 0xFF; buf[15] = (bpdu->root_path_cost >> 8) & 0xFF; buf[16] = bpdu->root_path_cost & 0xFF; buf[17] = bpdu->bridge_id.prio[0]; buf[18] = bpdu->bridge_id.prio[1]; buf[19] = bpdu->bridge_id.addr[0]; buf[20] = bpdu->bridge_id.addr[1]; buf[21] = bpdu->bridge_id.addr[2]; buf[22] = bpdu->bridge_id.addr[3]; buf[23] = bpdu->bridge_id.addr[4]; buf[24] = bpdu->bridge_id.addr[5]; buf[25] = (bpdu->port_id >> 8) & 0xFF; buf[26] = bpdu->port_id & 0xFF; br_set_ticks(buf+27, bpdu->message_age); br_set_ticks(buf+29, bpdu->max_age); br_set_ticks(buf+31, bpdu->hello_time); br_set_ticks(buf+33, bpdu->forward_delay); br_send_bpdu(p, buf, 35); p->stp_xstats.tx_bpdu++; } /* called under bridge lock */ void br_send_tcn_bpdu(struct net_bridge_port *p) { unsigned char buf[4]; if (p->br->stp_enabled != BR_KERNEL_STP) return; buf[0] = 0; buf[1] = 0; buf[2] = 0; buf[3] = BPDU_TYPE_TCN; br_send_bpdu(p, buf, 4); p->stp_xstats.tx_tcn++; } /* * Called from llc. * * NO locks, but rcu_read_lock */ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_device *dev) { struct net_bridge_port *p; struct net_bridge *br; const unsigned char *buf; if (!pskb_may_pull(skb, 4)) goto err; /* compare of protocol id and version */ buf = skb->data; if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; p = br_port_get_check_rcu(dev); if (!p) goto err; br = p->br; spin_lock(&br->lock); if (br->stp_enabled != BR_KERNEL_STP) goto out; if (!(br->dev->flags & IFF_UP)) goto out; if (p->state == BR_STATE_DISABLED) goto out; if (!ether_addr_equal(eth_hdr(skb)->h_dest, br->group_addr)) goto out; if (p->flags & BR_BPDU_GUARD) { br_notice(br, "BPDU received on blocked port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); br_stp_disable_port(p); goto out; } buf = skb_pull(skb, 3); if (buf[0] == BPDU_TYPE_CONFIG) { struct br_config_bpdu bpdu; if (!pskb_may_pull(skb, 32)) goto out; buf = skb->data; bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0; bpdu.topology_change_ack = (buf[1] & 0x80) ? 1 : 0; bpdu.root.prio[0] = buf[2]; bpdu.root.prio[1] = buf[3]; bpdu.root.addr[0] = buf[4]; bpdu.root.addr[1] = buf[5]; bpdu.root.addr[2] = buf[6]; bpdu.root.addr[3] = buf[7]; bpdu.root.addr[4] = buf[8]; bpdu.root.addr[5] = buf[9]; bpdu.root_path_cost = (buf[10] << 24) | (buf[11] << 16) | (buf[12] << 8) | buf[13]; bpdu.bridge_id.prio[0] = buf[14]; bpdu.bridge_id.prio[1] = buf[15]; bpdu.bridge_id.addr[0] = buf[16]; bpdu.bridge_id.addr[1] = buf[17]; bpdu.bridge_id.addr[2] = buf[18]; bpdu.bridge_id.addr[3] = buf[19]; bpdu.bridge_id.addr[4] = buf[20]; bpdu.bridge_id.addr[5] = buf[21]; bpdu.port_id = (buf[22] << 8) | buf[23]; bpdu.message_age = br_get_ticks(buf+24); bpdu.max_age = br_get_ticks(buf+26); bpdu.hello_time = br_get_ticks(buf+28); bpdu.forward_delay = br_get_ticks(buf+30); if (bpdu.message_age > bpdu.max_age) { if (net_ratelimit()) br_notice(p->br, "port %u config from %pM" " (message_age %ul > max_age %ul)\n", p->port_no, eth_hdr(skb)->h_source, bpdu.message_age, bpdu.max_age); goto out; } br_received_config_bpdu(p, &bpdu); } else if (buf[0] == BPDU_TYPE_TCN) { br_received_tcn_bpdu(p); } out: spin_unlock(&br->lock); err: kfree_skb(skb); }
27 5 26 22 20 4 26 1 24 4 1 26 26 26 4 4 4 1 1 1 42 1 40 1 37 1 5 2 1 34 13 23 23 23 23 19 2 4 2 43 43 41 38 38 7 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" /* LINKMODES_GET */ struct linkmodes_req_info { struct ethnl_req_info base; }; struct linkmodes_reply_data { struct ethnl_reply_data base; struct ethtool_link_ksettings ksettings; struct ethtool_link_settings *lsettings; bool peer_empty; }; #define LINKMODES_REPDATA(__reply_base) \ container_of(__reply_base, struct linkmodes_reply_data, base) const struct nla_policy ethnl_linkmodes_get_policy[] = { [ETHTOOL_A_LINKMODES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int linkmodes_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; data->lsettings = &data->ksettings.base; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = __ethtool_get_link_ksettings(dev, &data->ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); goto out; } if (!dev->ethtool_ops->cap_link_lanes_supported) data->ksettings.lanes = 0; data->peer_empty = bitmap_empty(data->ksettings.link_modes.lp_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); out: ethnl_ops_complete(dev); return ret; } static int linkmodes_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int len, ret; len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */ + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */ + nla_total_size(sizeof(u32)) /* LINKMODES_LANES */ + nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */ + nla_total_size(sizeof(u8)) /* LINKMODES_RATE_MATCHING */ + 0; ret = ethnl_bitset_size(ksettings->link_modes.advertising, ksettings->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; len += ret; if (!data->peer_empty) { ret = ethnl_bitset_size(ksettings->link_modes.lp_advertising, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; len += ret; } if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED) len += nla_total_size(sizeof(u8)); if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED) len += nla_total_size(sizeof(u8)); return len; } static int linkmodes_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int ret; if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_AUTONEG, lsettings->autoneg)) return -EMSGSIZE; ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_OURS, ksettings->link_modes.advertising, ksettings->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return -EMSGSIZE; if (!data->peer_empty) { ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_PEER, ksettings->link_modes.lp_advertising, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return -EMSGSIZE; } if (nla_put_u32(skb, ETHTOOL_A_LINKMODES_SPEED, lsettings->speed) || nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex)) return -EMSGSIZE; if (ksettings->lanes && nla_put_u32(skb, ETHTOOL_A_LINKMODES_LANES, ksettings->lanes)) return -EMSGSIZE; if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED && nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, lsettings->master_slave_cfg)) return -EMSGSIZE; if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED && nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, lsettings->master_slave_state)) return -EMSGSIZE; if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_RATE_MATCHING, lsettings->rate_matching)) return -EMSGSIZE; return 0; } /* LINKMODES_SET */ const struct nla_policy ethnl_linkmodes_set_policy[] = { [ETHTOOL_A_LINKMODES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_LINKMODES_AUTONEG] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_OURS] = { .type = NLA_NESTED }, [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 }, [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_LANES] = NLA_POLICY_RANGE(NLA_U32, 1, 8), }; /* Set advertised link modes to all supported modes matching requested speed, * lanes and duplex values. Called when autonegotiation is on, speed, lanes or * duplex is requested but no link mode change. This is done in userspace with * ioctl() interface, move it into kernel for netlink. * Returns true if advertised modes bitmap was modified. */ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings, bool req_speed, bool req_lanes, bool req_duplex) { unsigned long *advertising = ksettings->link_modes.advertising; unsigned long *supported = ksettings->link_modes.supported; DECLARE_BITMAP(old_adv, __ETHTOOL_LINK_MODE_MASK_NBITS); unsigned int i; bitmap_copy(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) { const struct link_mode_info *info = &link_mode_params[i]; if (info->speed == SPEED_UNKNOWN) continue; if (test_bit(i, supported) && (!req_speed || info->speed == ksettings->base.speed) && (!req_lanes || info->lanes == ksettings->lanes) && (!req_duplex || info->duplex == ksettings->base.duplex)) set_bit(i, advertising); else clear_bit(i, advertising); } return !bitmap_equal(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); } static bool ethnl_validate_master_slave_cfg(u8 cfg) { switch (cfg) { case MASTER_SLAVE_CFG_MASTER_PREFERRED: case MASTER_SLAVE_CFG_SLAVE_PREFERRED: case MASTER_SLAVE_CFG_MASTER_FORCE: case MASTER_SLAVE_CFG_SLAVE_FORCE: return true; } return false; } static int ethnl_check_linkmodes(struct genl_info *info, struct nlattr **tb) { const struct nlattr *master_slave_cfg, *lanes_cfg; master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; if (master_slave_cfg && !ethnl_validate_master_slave_cfg(nla_get_u8(master_slave_cfg))) { NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, "master/slave value is invalid"); return -EOPNOTSUPP; } lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES]; if (lanes_cfg && !is_power_of_2(nla_get_u32(lanes_cfg))) { NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg, "lanes value is invalid"); return -EINVAL; } return 0; } static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, struct ethtool_link_ksettings *ksettings, bool *mod, const struct net_device *dev) { struct ethtool_link_settings *lsettings = &ksettings->base; bool req_speed, req_lanes, req_duplex; const struct nlattr *master_slave_cfg, *lanes_cfg; int ret; master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; if (master_slave_cfg) { if (lsettings->master_slave_cfg == MASTER_SLAVE_CFG_UNSUPPORTED) { NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, "master/slave configuration not supported by device"); return -EOPNOTSUPP; } } *mod = false; req_speed = tb[ETHTOOL_A_LINKMODES_SPEED]; req_lanes = tb[ETHTOOL_A_LINKMODES_LANES]; req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX]; ethnl_update_u8(&lsettings->autoneg, tb[ETHTOOL_A_LINKMODES_AUTONEG], mod); lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES]; if (lanes_cfg) { /* If autoneg is off and lanes parameter is not supported by the * driver, return an error. */ if (!lsettings->autoneg && !dev->ethtool_ops->cap_link_lanes_supported) { NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg, "lanes configuration not supported by device"); return -EOPNOTSUPP; } } else if (!lsettings->autoneg && ksettings->lanes) { /* If autoneg is off and lanes parameter is not passed from user but * it was defined previously then set the lanes parameter to 0. */ ksettings->lanes = 0; *mod = true; } ret = ethnl_update_bitset(ksettings->link_modes.advertising, __ETHTOOL_LINK_MODE_MASK_NBITS, tb[ETHTOOL_A_LINKMODES_OURS], link_mode_names, info->extack, mod); if (ret < 0) return ret; ethnl_update_u32(&lsettings->speed, tb[ETHTOOL_A_LINKMODES_SPEED], mod); ethnl_update_u32(&ksettings->lanes, lanes_cfg, mod); ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX], mod); ethnl_update_u8(&lsettings->master_slave_cfg, master_slave_cfg, mod); if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg && (req_speed || req_lanes || req_duplex) && ethnl_auto_linkmodes(ksettings, req_speed, req_lanes, req_duplex)) *mod = true; return 0; } static int ethnl_set_linkmodes_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; int ret; ret = ethnl_check_linkmodes(info, info->attrs); if (ret < 0) return ret; if (!ops->get_link_ksettings || !ops->set_link_ksettings) return -EOPNOTSUPP; return 1; } static int ethnl_set_linkmodes(struct ethnl_req_info *req_info, struct genl_info *info) { struct ethtool_link_ksettings ksettings = {}; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; bool mod = false; int ret; ret = __ethtool_get_link_ksettings(dev, &ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); return ret; } ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod, dev); if (ret < 0) return ret; if (!mod) return 0; ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "link settings update failed"); return ret; } return 1; } const struct ethnl_request_ops ethnl_linkmodes_request_ops = { .request_cmd = ETHTOOL_MSG_LINKMODES_GET, .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY, .hdr_attr = ETHTOOL_A_LINKMODES_HEADER, .req_info_size = sizeof(struct linkmodes_req_info), .reply_data_size = sizeof(struct linkmodes_reply_data), .prepare_data = linkmodes_prepare_data, .reply_size = linkmodes_reply_size, .fill_reply = linkmodes_fill_reply, .set_validate = ethnl_set_linkmodes_validate, .set = ethnl_set_linkmodes, .set_ntf_cmd = ETHTOOL_MSG_LINKMODES_NTF, };
1575 1581 7 1585 1579 1579 1783 1761 8 6 6 22 18 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/macsec.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, METADATA_MACSEC, METADATA_XFRM, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct macsec_info { sci_t sci; }; struct xfrm_md_info { u32 if_id; int link; struct dst_entry *dst_orig; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; struct macsec_info macsec_info; struct xfrm_md_info xfrm_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) { return (struct xfrm_md_info *)lwt->data; } static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; dst = skb_dst(skb); if (dst && dst->lwtstate && dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) return lwt_xfrm_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); case METADATA_MACSEC: return memcmp(&a->u.macsec_info, &b->u.macsec_info, sizeof(a->u.macsec_info)); case METADATA_XFRM: return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, sizeof(a->u.xfrm_info)); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { int ret; ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); if (ret) { metadata_dst_free(new_md); return ERR_PTR(ret); } } #endif skb_dst_drop(skb); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; tun_dst = __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); if (tun_dst && (iph->frag_off & htons(IP_DF))) __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_dst->u.tun_info.key.tun_flags); return tun_dst; } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */
5 5 233 234 234 234 5 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 /* SPDX-License-Identifier: GPL-2.0+ */ /* * Read-Copy Update mechanism for mutual exclusion, adapted for tracing. * * Copyright (C) 2020 Paul E. McKenney. */ #ifndef __LINUX_RCUPDATE_TRACE_H #define __LINUX_RCUPDATE_TRACE_H #include <linux/sched.h> #include <linux/rcupdate.h> #include <linux/cleanup.h> #ifdef CONFIG_TASKS_TRACE_RCU extern struct srcu_struct rcu_tasks_trace_srcu_struct; #endif // #ifdef CONFIG_TASKS_TRACE_RCU #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) static inline int rcu_read_lock_trace_held(void) { return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct); } #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) static inline int rcu_read_lock_trace_held(void) { return 1; } #endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) #ifdef CONFIG_TASKS_TRACE_RCU /** * rcu_read_lock_tasks_trace - mark beginning of RCU-trace read-side critical section * * When synchronize_rcu_tasks_trace() is invoked by one task, then that * task is guaranteed to block until all other tasks exit their read-side * critical sections. Similarly, if call_rcu_trace() is invoked on one * task while other tasks are within RCU read-side critical sections, * invocation of the corresponding RCU callback is deferred until after * the all the other tasks exit their critical sections. * * For more details, please see the documentation for * srcu_read_lock_fast(). For a description of how implicit RCU * readers provide the needed ordering for architectures defining the * ARCH_WANTS_NO_INSTR Kconfig option (and thus promising never to trace * code where RCU is not watching), please see the __srcu_read_lock_fast() * (non-kerneldoc) header comment. Otherwise, the smp_mb() below provided * the needed ordering. */ static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void) { struct srcu_ctr __percpu *ret = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) smp_mb(); // Provide ordering on noinstr-incomplete architectures. return ret; } /** * rcu_read_unlock_tasks_trace - mark end of RCU-trace read-side critical section * @scp: return value from corresponding rcu_read_lock_tasks_trace(). * * Pairs with the preceding call to rcu_read_lock_tasks_trace() that * returned the value passed in via scp. * * For more details, please see the documentation for rcu_read_unlock(). * For memory-ordering information, please see the header comment for the * rcu_read_lock_tasks_trace() function. */ static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp) { if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) smp_mb(); // Provide ordering on noinstr-incomplete architectures. __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); } /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section * * When synchronize_rcu_tasks_trace() is invoked by one task, then that * task is guaranteed to block until all other tasks exit their read-side * critical sections. Similarly, if call_rcu_trace() is invoked on one * task while other tasks are within RCU read-side critical sections, * invocation of the corresponding RCU callback is deferred until after * the all the other tasks exit their critical sections. * * For more details, please see the documentation for rcu_read_lock(). */ static inline void rcu_read_lock_trace(void) { struct task_struct *t = current; rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); if (t->trc_reader_nesting++) { // In case we interrupted a Tasks Trace RCU reader. return; } barrier(); // nesting before scp to protect against interrupt handler. t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) smp_mb(); // Placeholder for more selective ordering } /** * rcu_read_unlock_trace - mark end of RCU-trace read-side critical section * * Pairs with a preceding call to rcu_read_lock_trace(), and nesting is * allowed. Invoking a rcu_read_unlock_trace() when there is no matching * rcu_read_lock_trace() is verboten, and will result in lockdep complaints. * * For more details, please see the documentation for rcu_read_unlock(). */ static inline void rcu_read_unlock_trace(void) { struct srcu_ctr __percpu *scp; struct task_struct *t = current; scp = t->trc_reader_scp; barrier(); // scp before nesting to protect against interrupt handler. if (!--t->trc_reader_nesting) { if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) smp_mb(); // Placeholder for more selective ordering __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); } srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); } /** * call_rcu_tasks_trace() - Queue a callback trace task-based grace period * @rhp: structure to be used for queueing the RCU updates. * @func: actual callback function to be invoked after the grace period * * The callback function will be invoked some time after a trace rcu-tasks * grace period elapses, in other words after all currently executing * trace rcu-tasks read-side critical sections have completed. These * read-side critical sections are delimited by calls to rcu_read_lock_trace() * and rcu_read_unlock_trace(). * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. */ static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) { call_srcu(&rcu_tasks_trace_srcu_struct, rhp, func); } /** * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period * * Control will return to the caller some time after a trace rcu-tasks * grace period has elapsed, in other words after all currently executing * trace rcu-tasks read-side critical sections have elapsed. These read-side * critical sections are delimited by calls to rcu_read_lock_trace() * and rcu_read_unlock_trace(). * * This is a very specialized primitive, intended only for a few uses in * tracing and other situations requiring manipulation of function preambles * and profiling hooks. The synchronize_rcu_tasks_trace() function is not * (yet) intended for heavy use from multiple CPUs. * * See the description of synchronize_rcu() for more detailed information * on memory ordering guarantees. */ static inline void synchronize_rcu_tasks_trace(void) { synchronize_srcu(&rcu_tasks_trace_srcu_struct); } /** * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks. * * Note that rcu_barrier_tasks_trace() is not obligated to actually wait, * for example, if there are no pending callbacks. */ static inline void rcu_barrier_tasks_trace(void) { srcu_barrier(&rcu_tasks_trace_srcu_struct); } /** * rcu_tasks_trace_expedite_current - Expedite the current Tasks Trace RCU grace period * * Cause the current Tasks Trace RCU grace period to become expedited. * The grace period following the current one might also be expedited. * If there is no current grace period, one might be created. If the * current grace period is currently sleeping, that sleep will complete * before expediting will take effect. */ static inline void rcu_tasks_trace_expedite_current(void) { srcu_expedite_current(&rcu_tasks_trace_srcu_struct); } // Placeholders to enable stepwise transition. void __init rcu_tasks_trace_suppress_unused(void); #else /* * The BPF JIT forms these addresses even when it doesn't call these * functions, so provide definitions that result in runtime errors. */ static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) { BUG(); } static inline void rcu_read_lock_trace(void) { BUG(); } static inline void rcu_read_unlock_trace(void) { BUG(); } #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ DEFINE_LOCK_GUARD_0(rcu_tasks_trace, rcu_read_lock_trace(), rcu_read_unlock_trace()) #endif /* __LINUX_RCUPDATE_TRACE_H */
44 44 44 44 44 44 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_app.c: Application module support for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference * is that ip_vs_app module handles the reverse direction (incoming requests * and outgoing responses). * * IP_MASQ_APP application masquerading module * * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> */ #define pr_fmt(fmt) "IPVS: " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> #include <linux/stat.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> #include <net/ip_vs.h> EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); static DEFINE_MUTEX(__ip_vs_app_mutex); /* * Get an ip_vs_app object */ static inline int ip_vs_app_get(struct ip_vs_app *app) { return try_module_get(app->module); } static inline void ip_vs_app_put(struct ip_vs_app *app) { module_put(app->module); } static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) { kfree(inc->timeout_table); kfree(inc); } static void ip_vs_app_inc_rcu_free(struct rcu_head *head) { struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); ip_vs_app_inc_destroy(inc); } /* * Allocate/initialize app incarnation and register it in proto apps. */ static int ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { struct ip_vs_protocol *pp; struct ip_vs_app *inc; int ret; if (!(pp = ip_vs_proto_get(proto))) return -EPROTONOSUPPORT; if (!pp->unregister_app) return -EOPNOTSUPP; inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); if (!inc) return -ENOMEM; INIT_LIST_HEAD(&inc->p_list); INIT_LIST_HEAD(&inc->incs_list); inc->app = app; inc->port = htons(port); atomic_set(&inc->usecnt, 0); if (app->timeouts) { inc->timeout_table = ip_vs_create_timeout_table(app->timeouts, app->timeouts_size); if (!inc->timeout_table) { ret = -ENOMEM; goto out; } } ret = pp->register_app(ipvs, inc); if (ret) goto out; list_add(&inc->a_list, &app->incs_list); IP_VS_DBG(9, "%s App %s:%u registered\n", pp->name, inc->name, ntohs(inc->port)); return 0; out: ip_vs_app_inc_destroy(inc); return ret; } /* * Release app incarnation */ static void ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; if (!(pp = ip_vs_proto_get(inc->protocol))) return; if (pp->unregister_app) pp->unregister_app(ipvs, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); list_del(&inc->a_list); call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); } /* * Get reference to app inc (only called from softirq) * */ int ip_vs_app_inc_get(struct ip_vs_app *inc) { int result; result = ip_vs_app_get(inc->app); if (result) atomic_inc(&inc->usecnt); return result; } /* * Put the app inc (only called from timer or net softirq) */ void ip_vs_app_inc_put(struct ip_vs_app *inc) { atomic_dec(&inc->usecnt); ip_vs_app_put(inc->app); } /* * Register an application incarnation in protocol applications */ int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { int result; mutex_lock(&__ip_vs_app_mutex); result = ip_vs_app_inc_new(ipvs, app, proto, port); mutex_unlock(&__ip_vs_app_mutex); return result; } /* Register application for netns */ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { struct ip_vs_app *a; int err = 0; mutex_lock(&__ip_vs_app_mutex); /* increase the module use count */ if (!ip_vs_use_count_inc()) { err = -ENOENT; goto out_unlock; } list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; /* decrease the module use count */ ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; /* decrease the module use count */ ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); out_unlock: mutex_unlock(&__ip_vs_app_mutex); return err ? ERR_PTR(err) : a; } /* * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services * Caller should use synchronize_rcu() or rcu_barrier() */ void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { struct ip_vs_app *a, *anxt, *inc, *nxt; mutex_lock(&__ip_vs_app_mutex); list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { if (app && strcmp(app->name, a->name)) continue; list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { ip_vs_app_inc_release(ipvs, inc); } list_del(&a->a_list); kfree(a); /* decrease the module use count */ ip_vs_use_count_dec(); } mutex_unlock(&__ip_vs_app_mutex); } /* * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) */ int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { return pp->app_conn_bind(cp); } /* * Unbind cp from application incarnation (called by cp destructor) */ void ip_vs_unbind_app(struct ip_vs_conn *cp) { struct ip_vs_app *inc = cp->app; if (!inc) return; if (inc->unbind_conn) inc->unbind_conn(inc, cp); if (inc->done_conn) inc->done_conn(inc, cp); ip_vs_app_inc_put(inc); cp->app = NULL; } /* * Fixes th->seq based on ip_vs_seq info. */ static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) { __u32 seq = ntohl(th->seq); /* * Adjust seq with delta-offset for all packets after * the most recent resized pkt seq and with previous_delta offset * for all packets before most recent resized pkt seq. */ if (vseq->delta || vseq->previous_delta) { if(after(seq, vseq->init_seq)) { th->seq = htonl(seq + vseq->delta); IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", __func__, vseq->delta); } else { th->seq = htonl(seq + vseq->previous_delta); IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", __func__, vseq->previous_delta); } } } /* * Fixes th->ack_seq based on ip_vs_seq info. */ static inline void vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) { __u32 ack_seq = ntohl(th->ack_seq); /* * Adjust ack_seq with delta-offset for * the packets AFTER most recent resized pkt has caused a shift * for packets before most recent resized pkt, use previous_delta */ if (vseq->delta || vseq->previous_delta) { /* since ack_seq is the number of octet that is expected to receive next, so compare it with init_seq+delta */ if(after(ack_seq, vseq->init_seq+vseq->delta)) { th->ack_seq = htonl(ack_seq - vseq->delta); IP_VS_DBG(9, "%s(): subtracted delta " "(%d) from ack_seq\n", __func__, vseq->delta); } else { th->ack_seq = htonl(ack_seq - vseq->previous_delta); IP_VS_DBG(9, "%s(): subtracted " "previous_delta (%d) from ack_seq\n", __func__, vseq->previous_delta); } } } /* * Updates ip_vs_seq if pkt has been resized * Assumes already checked proto==IPPROTO_TCP and diff!=0. */ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, unsigned int flag, __u32 seq, int diff) { /* spinlock is to keep updating cp->flags atomic */ spin_lock_bh(&cp->lock); if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { vseq->previous_delta = vseq->delta; vseq->delta += diff; vseq->init_seq = seq; cp->flags |= flag; } spin_unlock_bh(&cp->lock); } static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app, struct ip_vs_iphdr *ipvsh) { int diff; const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) return 0; th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized */ seq = ntohl(th->seq); /* * Fix seq stuff if flagged as so. */ if (cp->flags & IP_VS_CONN_F_OUT_SEQ) vs_fix_seq(&cp->out_seq, th); if (cp->flags & IP_VS_CONN_F_IN_SEQ) vs_fix_ack_seq(&cp->in_seq, th); /* * Call private output hook function */ if (app->pkt_out == NULL) return 1; if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) return 0; /* * Update ip_vs seq stuff if len has changed. */ if (diff != 0) vs_seq_update(cp, &cp->out_seq, IP_VS_CONN_F_OUT_SEQ, seq, diff); return 1; } /* * Output pkt hook. Will call bound ip_vs_app specific function * called by ipvs packet handler, assumes previously checked cp!=NULL * returns false if it can't handle packet (oom) */ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_iphdr *ipvsh) { struct ip_vs_app *app; /* * check if application module is bound to * this ip_vs_conn. */ if ((app = cp->app) == NULL) return 1; /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) return app_tcp_pkt_out(cp, skb, app, ipvsh); /* * Call private output hook function */ if (app->pkt_out == NULL) return 1; return app->pkt_out(app, cp, skb, NULL, ipvsh); } static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app, struct ip_vs_iphdr *ipvsh) { int diff; const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) return 0; th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized */ seq = ntohl(th->seq); /* * Fix seq stuff if flagged as so. */ if (cp->flags & IP_VS_CONN_F_IN_SEQ) vs_fix_seq(&cp->in_seq, th); if (cp->flags & IP_VS_CONN_F_OUT_SEQ) vs_fix_ack_seq(&cp->out_seq, th); /* * Call private input hook function */ if (app->pkt_in == NULL) return 1; if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) return 0; /* * Update ip_vs seq stuff if len has changed. */ if (diff != 0) vs_seq_update(cp, &cp->in_seq, IP_VS_CONN_F_IN_SEQ, seq, diff); return 1; } /* * Input pkt hook. Will call bound ip_vs_app specific function * called by ipvs packet handler, assumes previously checked cp!=NULL. * returns false if can't handle packet (oom). */ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_iphdr *ipvsh) { struct ip_vs_app *app; /* * check if application module is bound to * this ip_vs_conn. */ if ((app = cp->app) == NULL) return 1; /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) return app_tcp_pkt_in(cp, skb, app, ipvsh); /* * Call private input hook function */ if (app->pkt_in == NULL) return 1; return app->pkt_in(app, cp, skb, NULL, ipvsh); } #ifdef CONFIG_PROC_FS /* * /proc/net/ip_vs_app entry function */ static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) { struct ip_vs_app *app, *inc; list_for_each_entry(app, &ipvs->app_list, a_list) { list_for_each_entry(inc, &app->incs_list, a_list) { if (pos-- == 0) return inc; } } return NULL; } static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_app_mutex); return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_app *inc, *app; struct list_head *e; struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) return ip_vs_app_idx(ipvs, 0); inc = v; app = inc->app; if ((e = inc->a_list.next) != &app->incs_list) return list_entry(e, struct ip_vs_app, a_list); /* go on to next application */ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { app = list_entry(e, struct ip_vs_app, a_list); list_for_each_entry(inc, &app->incs_list, a_list) { return inc; } } return NULL; } static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { mutex_unlock(&__ip_vs_app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "prot port usecnt name\n"); else { const struct ip_vs_app *inc = v; seq_printf(seq, "%-3s %-7u %-6d %-17s\n", ip_vs_proto_name(inc->protocol), ntohs(inc->port), atomic_read(&inc->usecnt), inc->name); } return 0; } static const struct seq_operations ip_vs_app_seq_ops = { .start = ip_vs_app_seq_start, .next = ip_vs_app_seq_next, .stop = ip_vs_app_seq_stop, .show = ip_vs_app_seq_show, }; #endif int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); #ifdef CONFIG_PROC_FS if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; #endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); #ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); #endif }
4 5 85 98 97 96 98 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 // SPDX-License-Identifier: GPL-2.0-or-later /* * Key-agreement Protocol Primitives (KPP) * * Copyright (c) 2016, Intel Corporation * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com> */ #include <crypto/internal/kpp.h> #include <linux/cryptouser.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/string.h> #include <net/netlink.h> #include "internal.h" static int __maybe_unused crypto_kpp_report( struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; memset(&rkpp, 0, sizeof(rkpp)); strscpy(rkpp.type, "kpp", sizeof(rkpp.type)); return nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(rkpp), &rkpp); } static void __maybe_unused crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) { seq_puts(m, "type : kpp\n"); } static void crypto_kpp_exit_tfm(struct crypto_tfm *tfm) { struct crypto_kpp *kpp = __crypto_kpp_tfm(tfm); struct kpp_alg *alg = crypto_kpp_alg(kpp); alg->exit(kpp); } static int crypto_kpp_init_tfm(struct crypto_tfm *tfm) { struct crypto_kpp *kpp = __crypto_kpp_tfm(tfm); struct kpp_alg *alg = crypto_kpp_alg(kpp); if (alg->exit) kpp->base.exit = crypto_kpp_exit_tfm; if (alg->init) return alg->init(kpp); return 0; } static void crypto_kpp_free_instance(struct crypto_instance *inst) { struct kpp_instance *kpp = kpp_instance(inst); kpp->free(kpp); } static const struct crypto_type crypto_kpp_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_kpp_init_tfm, .free = crypto_kpp_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_kpp_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_kpp_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_KPP, .tfmsize = offsetof(struct crypto_kpp, base), .algsize = offsetof(struct kpp_alg, base), }; struct crypto_kpp *crypto_alloc_kpp(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_kpp_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_kpp); int crypto_grab_kpp(struct crypto_kpp_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_kpp_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_kpp); int crypto_has_kpp(const char *alg_name, u32 type, u32 mask) { return crypto_type_has_alg(alg_name, &crypto_kpp_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_has_kpp); static void kpp_prepare_alg(struct kpp_alg *alg) { struct crypto_alg *base = &alg->base; base->cra_type = &crypto_kpp_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; base->cra_flags |= CRYPTO_ALG_TYPE_KPP; } int crypto_register_kpp(struct kpp_alg *alg) { struct crypto_alg *base = &alg->base; kpp_prepare_alg(alg); return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_kpp); void crypto_unregister_kpp(struct kpp_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_kpp); int kpp_register_instance(struct crypto_template *tmpl, struct kpp_instance *inst) { if (WARN_ON(!inst->free)) return -EINVAL; kpp_prepare_alg(&inst->alg); return crypto_register_instance(tmpl, kpp_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(kpp_register_instance); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Key-agreement Protocol Primitives");
28 28 7 23 24 7 31 24 12 30 10 22 6 23 28 6 1 33 31 7 23 23 4 6 28 26 7 7 7 20 22 3 19 22 20 21 10 3 3 6 6 27 27 27 6 22 22 22 22 5 2 4 4 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_xmit.c: various packet transmitters for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Julian Anastasov <ja@ssi.bg> * * Changes: * * Description of forwarding methods: * - all transmitters are called from LOCAL_IN (remote clients) and * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD * - not all connections have destination server, for example, * connections in backup server when fwmark is used * - bypass connections use daddr from packet * - we can use dst without ref while sending in RCU section, we use * ref when returning NF_ACCEPT for NAT-ed packet via loopback * LOCAL_OUT rules: * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) * - skb->pkt_type is not set yet * - the only place where we can see skb->sk != NULL */ #define pr_fmt(fmt) "IPVS: " fmt #include <linux/kernel.h> #include <linux/slab.h> #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/gue.h> #include <net/gre.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ #include <net/route.h> /* for ip_route_output */ #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/ip_tunnels.h> #include <net/ip6_checksum.h> #include <net/addrconf.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> enum { IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to * local */ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ }; static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) { return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); } static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) { kfree(dest_dst); } /* * Destination cache to speed up outgoing route lookup */ static inline void __ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, struct dst_entry *dst, u32 dst_cookie) { struct ip_vs_dest_dst *old; old = rcu_dereference_protected(dest->dest_dst, lockdep_is_held(&dest->dst_lock)); if (dest_dst) { dest_dst->dst_cache = dst; dest_dst->dst_cookie = dst_cookie; } rcu_assign_pointer(dest->dest_dst, dest_dst); if (old) call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); } static inline struct ip_vs_dest_dst * __ip_vs_dst_check(struct ip_vs_dest *dest) { struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); struct dst_entry *dst; if (!dest_dst) return NULL; dst = dest_dst->dst_cache; if (READ_ONCE(dst->obsolete) && dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; return dest_dst; } static inline bool __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) { if (IP6CB(skb)->frag_max_size) { /* frag_max_size tell us that, this packet have been * defragmented by netfilter IPv6 conntrack module. */ if (IP6CB(skb)->frag_max_size > mtu) return true; /* largest fragment violate MTU */ } else if (skb->len > mtu && !skb_is_gso(skb)) { return true; /* Packet size violate MTU size */ } return false; } /* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; } if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); flowi4_update_output(&fl4, 0, daddr, fl4.saddr); rt_mode = 0; goto retry; } if (ret_saddr) *ret_saddr = fl4.saddr; return rt; } #ifdef CONFIG_IP_VS_IPV6 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) { return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; } #endif static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, int rt_mode, bool new_rt_is_local) { bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_NON_LOCAL); bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); bool source_is_loopback; bool old_rt_is_local; #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); source_is_loopback = (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (addr_type & IPV6_ADDR_LOOPBACK); old_rt_is_local = __ip_vs_is_local_route6( dst_rt6_info(skb_dst(skb))); } else #endif { source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr); old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; } if (unlikely(new_rt_is_local)) { if (!rt_mode_allow_local) return true; if (!rt_mode_allow_redirect && !old_rt_is_local) return true; } else { if (!rt_mode_allow_non_local) return true; if (source_is_loopback) return true; } return false; } static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) { struct sock *sk = skb->sk; struct rtable *ort = skb_rtable(skb); if (!skb->dev && sk && sk_fullsock(sk)) ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); } static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, int rt_mode, struct ip_vs_iphdr *ipvsh, struct sk_buff *skb, int mtu) { #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { struct net *net = ipvs->net; if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { if (!skb->dev) skb->dev = net->loopback_dev; /* only send ICMP too big on first fragment */ if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); return false; } } else #endif { /* If we're going to tunnel the packet and pmtu discovery * is disabled, we'll just fragment it anyway */ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) return true; if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && skb->len > mtu && !skb_is_gso(skb) && !ip_vs_iph_icmp(ipvsh))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG(1, "frag needed for %pI4\n", &ip_hdr(skb)->saddr); return false; } } return true; } static inline bool decrement_ttl(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb) { struct net *net = ipvs->net; #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { struct dst_entry *dst = skb_dst(skb); /* check and decrement ttl */ if (ipv6_hdr(skb)->hop_limit <= 1) { struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return false; } /* don't propagate ttl change to cloned packets */ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr))) return false; ipv6_hdr(skb)->hop_limit--; } else #endif { if (ip_hdr(skb)->ttl <= 1) { /* Tell the sender its packet died... */ IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); return false; } /* don't propagate ttl change to cloned packets */ if (skb_ensure_writable(skb, sizeof(struct iphdr))) return false; /* Decrease ttl */ ip_decrease_ttl(ip_hdr(skb)); } return true; } /* Get route to destination or remote server */ static int __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, __be32 daddr, int rt_mode, __be32 *ret_saddr, struct ip_vs_iphdr *ipvsh) { struct net *net = ipvs->net; struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ int mtu; int local, noref = 1; if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) rt = dst_rtable(dest_dst->dst_cache); else { dest_dst = ip_vs_dest_dst_alloc(); spin_lock_bh(&dest->dst_lock); if (!dest_dst) { __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock_bh(&dest->dst_lock); goto err_unreach; } rt = do_output_route4(net, dest->addr.ip, rt_mode, &dest_dst->dst_saddr.ip); if (!rt) { __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock_bh(&dest->dst_lock); ip_vs_dest_dst_free(dest_dst); goto err_unreach; } __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest_dst->dst_saddr.ip, rcuref_read(&rt->dst.__rcuref)); } if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" " daddr=%pI4\n", &daddr); goto err_put; } if (unlikely(local)) { /* skb to local stack, preserve old route */ if (!noref) ip_rt_put(rt); return local; } if (!decrement_ttl(ipvs, skb_af, skb)) goto err_put; if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { mtu = dst_mtu(&rt->dst); } else { mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (!dest) goto err_put; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if ((dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) __set_bit(IP_TUNNEL_CSUM_BIT, tflags); mtu -= gre_calc_hlen(tflags); } if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; } maybe_update_pmtu(skb_af, skb, mtu); } if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; skb_dst_drop(skb); if (noref) skb_dst_set_noref(skb, &rt->dst); else skb_dst_set(skb, &rt->dst); return local; err_put: if (!noref) ip_rt_put(rt); return -1; err_unreach: if (!skb->dev) skb->dev = skb_dst(skb)->dev; dst_link_failure(skb); return -1; } #ifdef CONFIG_IP_VS_IPV6 static struct dst_entry * __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm, int rt_mode) { struct dst_entry *dst; struct flowi6 fl6 = { .daddr = *daddr, }; if (rt_mode & IP_VS_RT_MODE_KNOWN_NH) fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) goto out_err; if (!ret_saddr) return dst; if (ipv6_addr_any(&fl6.saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl6.daddr, 0, &fl6.saddr) < 0) goto out_err; if (do_xfrm) { dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { dst = NULL; goto out_err; } } *ret_saddr = fl6.saddr; return dst; out_err: dst_release(dst); IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); return NULL; } /* * Get route to destination or remote server */ static int __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { struct net *net = ipvs->net; struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct dst_entry *dst; int mtu; int local, noref = 1; if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) rt = dst_rt6_info(dest_dst->dst_cache); else { u32 cookie; dest_dst = ip_vs_dest_dst_alloc(); spin_lock_bh(&dest->dst_lock); if (!dest_dst) { __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock_bh(&dest->dst_lock); goto err_unreach; } dst = __ip_vs_route_output_v6(net, &dest->addr.in6, &dest_dst->dst_saddr.in6, do_xfrm, rt_mode); if (!dst) { __ip_vs_dst_set(dest, NULL, NULL, 0); spin_unlock_bh(&dest->dst_lock); ip_vs_dest_dst_free(dest_dst); goto err_unreach; } rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest_dst->dst_saddr.in6, rcuref_read(&rt->dst.__rcuref)); } if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.in6; } else { noref = 0; dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm, rt_mode); if (!dst) goto err_unreach; rt = dst_rt6_info(dst); } local = __ip_vs_is_local_route6(rt); if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" " daddr=%pI6\n", daddr); goto err_put; } if (unlikely(local)) { /* skb to local stack, preserve old route */ if (!noref) dst_release(&rt->dst); return local; } if (!decrement_ttl(ipvs, skb_af, skb)) goto err_put; /* MTU checking */ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) mtu = dst_mtu(&rt->dst); else { mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); if (!dest) goto err_put; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if ((dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { IP_TUNNEL_DECLARE_FLAGS(tflags) = { }; if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) __set_bit(IP_TUNNEL_CSUM_BIT, tflags); mtu -= gre_calc_hlen(tflags); } if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); goto err_put; } maybe_update_pmtu(skb_af, skb, mtu); } if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; skb_dst_drop(skb); if (noref) skb_dst_set_noref(skb, &rt->dst); else skb_dst_set(skb, &rt->dst); return local; err_put: if (!noref) dst_release(&rt->dst); return -1; err_unreach: /* The ip6_link_failure function requires the dev field to be set * in order to get the net (further for the sake of fwmark * reflection). */ if (!skb->dev) skb->dev = skb_dst(skb)->dev; dst_link_failure(skb); return -1; } #endif /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, struct ip_vs_conn *cp) { int ret = NF_ACCEPT; skb->ipvs_property = 1; if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) ret = ip_vs_confirm_conntrack(skb); if (ret == NF_ACCEPT) { nf_reset_ct(skb); skb_forward_csum(skb); if (skb->dev) skb_clear_tstamp(skb); } return ret; } /* In the event of a remote destination, it's possible that we would have * matches against an old socket (particularly a TIME-WAIT socket). This * causes havoc down the line (ip_local_out et. al. expect regular sockets * and invalid memory accesses will happen) so simply drop the association * in this case. */ static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) { /* If dev is set, the packet came from the LOCAL_IN callback and * not from a local TCP socket. */ if (skb->dev) skb_orphan(skb); } /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) { int ret = NF_STOLEN; skb->ipvs_property = 1; if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); /* Remove the early_demux association unless it's bound for the * exact same port and address on this host after translation. */ if (!local || cp->vport != cp->dport || !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) ip_vs_drop_early_demux_sk(skb); if (!local) { skb_forward_csum(skb); if (skb->dev) skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else ret = NF_ACCEPT; return ret; } /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) { int ret = NF_STOLEN; skb->ipvs_property = 1; if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); if (skb->dev) skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else ret = NF_ACCEPT; return ret; } /* * NULL transmitter (do nothing except return NF_ACCEPT) */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { /* we do not touch skb and do not need pskb ptr */ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); } /* * Bypass transmitter * Let packets bypass the destination when the destination is not * available, it may be only used in transparent cache cluster. */ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct iphdr *iph = ip_hdr(skb); if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; ip_send_check(iph); /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); return NF_STOLEN; tx_error: kfree_skb(skb); return NF_STOLEN; } #ifdef CONFIG_IP_VS_IPV6 int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct ipv6hdr *iph = ipv6_hdr(skb); if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); return NF_STOLEN; tx_error: kfree_skb(skb); return NF_STOLEN; } #endif /* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ int local, rc, was_input; /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } was_input = rt_is_input_route(skb_rtable(skb)); local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR, NULL, ipvsh); if (local < 0)