| 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Round-Robin Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * Fixes/Changes: * Wensong Zhang : changed the ip_vs_rr_schedule to return dest * Julian Anastasov : fixed the NULL pointer access bug in debugging * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_rr_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> static int ip_vs_rr_init_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; return 0; } static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct list_head *p; spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; /* dest is already unlinked, so p->prev is not valid but * p->next is valid, use it to reach previous entry. */ if (p == &dest->n_list) svc->sched_data = p->next->prev; spin_unlock_bh(&svc->sched_lock); return 0; } /* * Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; int pass = 0; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; last = dest = list_entry(p, struct ip_vs_dest, n_list); do { list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) /* HIT */ goto out; if (dest == last) goto stop; } pass++; /* Previous dest could be unlinked, do not loop forever. * If we stay at head there is no need for 2nd pass. */ } while (pass < 2 && p != &svc->destinations); stop: spin_unlock_bh(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: svc->sched_data = &dest->n_list; spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), refcount_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .add_dest = NULL, .del_dest = ip_vs_rr_del_dest, .schedule = ip_vs_rr_schedule, }; static int __init ip_vs_rr_init(void) { return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); synchronize_rcu(); } module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); MODULE_DESCRIPTION("ipvs round-robin scheduler"); MODULE_LICENSE("GPL"); |
| 82 82 82 81 82 48 48 48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. * * This file contains power management functions related to interrupts. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> #include "internals.h" bool irq_pm_check_wakeup(struct irq_desc *desc) { if (irqd_is_wakeup_armed(&desc->irq_data)) { irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; desc->depth++; irq_disable(desc); pm_system_irq_wakeup(irq_desc_get_irq(desc)); return true; } return false; } /* * Called from __setup_irq() with desc->lock held after @action has * been installed in the action chain. */ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions++; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth++; WARN_ON_ONCE(desc->force_resume_depth && desc->force_resume_depth != desc->nr_actions); if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions); } /* * Called from __free_irq() with desc->lock held after @action has * been removed from the action chain. */ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions--; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth--; if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc) { unsigned long chipflags = irq_desc_get_chip(desc)->flags; struct irq_data *irqd = &desc->irq_data; if (!desc->action || irq_desc_is_chained(desc) || desc->no_suspend_depth) return false; if (irqd_is_wakeup_set(irqd)) { irqd_set(irqd, IRQD_WAKEUP_ARMED); if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) && irqd_irq_disabled(irqd)) { /* * Interrupt marked for wakeup is in disabled state. * Enable interrupt here to unmask/enable in irqchip * to be able to resume with such interrupts. */ __enable_irq(desc); irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } /* * We return true here to force the caller to issue * synchronize_irq(). We need to make sure that the * IRQD_WAKEUP_ARMED is visible before we return from * suspend_device_irqs(). */ return true; } desc->istate |= IRQS_SUSPENDED; __disable_irq(desc); /* * Hardware which has no wakeup source configuration facility * requires that the non wakeup interrupts are masked at the * chip level. The chip implementation indicates that with * IRQCHIP_MASK_ON_SUSPEND. */ if (chipflags & IRQCHIP_MASK_ON_SUSPEND) mask_irq(desc); return true; } /** * suspend_device_irqs - disable all currently enabled interrupt lines * * During system-wide suspend or hibernation device drivers need to be * prevented from receiving interrupts and this function is provided * for this purpose. * * So we disable all interrupts and mark them IRQS_SUSPENDED except * for those which are unused, those which are marked as not * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND * set and those which are marked as active wakeup sources. * * The active wakeup sources are handled by the flow handler entry * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the * interrupt and notifies the pm core about the wakeup. */ void suspend_device_irqs(void) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; bool sync; if (irq_settings_is_nested_thread(desc)) continue; raw_spin_lock_irqsave(&desc->lock, flags); sync = suspend_device_irq(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); if (sync) synchronize_irq(irq); } } static void resume_irq(struct irq_desc *desc) { struct irq_data *irqd = &desc->irq_data; irqd_clear(irqd, IRQD_WAKEUP_ARMED); if (irqd_is_enabled_on_suspend(irqd)) { /* * Interrupt marked for wakeup was enabled during suspend * entry. Disable such interrupts to restore them back to * original state. */ __disable_irq(desc); irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } if (desc->istate & IRQS_SUSPENDED) goto resume; /* Force resume the interrupt? */ if (!desc->force_resume_depth) return; /* Pretend that it got disabled ! */ desc->depth++; irq_state_set_disabled(desc); irq_state_set_masked(desc); resume: desc->istate &= ~IRQS_SUSPENDED; __enable_irq(desc); } static void resume_irqs(bool want_early) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; if (!is_early && want_early) continue; if (irq_settings_is_nested_thread(desc)) continue; raw_spin_lock_irqsave(&desc->lock, flags); resume_irq(desc); raw_spin_unlock_irqrestore(&desc->lock, flags); } } /** * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup * @irq: Interrupt to rearm */ void rearm_wake_irq(unsigned int irq) { unsigned long flags; struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); if (!desc) return; if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data)) goto unlock; desc->istate &= ~IRQS_SUSPENDED; irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); __enable_irq(desc); unlock: irq_put_desc_busunlock(desc, flags); } /** * irq_pm_syscore_resume - enable interrupt lines early * * Enable all interrupt lines with %IRQF_EARLY_RESUME set. */ static void irq_pm_syscore_resume(void) { resume_irqs(true); } static struct syscore_ops irq_pm_syscore_ops = { .resume = irq_pm_syscore_resume, }; static int __init irq_pm_init_ops(void) { register_syscore_ops(&irq_pm_syscore_ops); return 0; } device_initcall(irq_pm_init_ops); /** * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() * * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag * set as well as those with %IRQF_FORCE_RESUME. */ void resume_device_irqs(void) { resume_irqs(false); } |
| 2 2 2 2 2 2 16 16 16 16 1 16 16 15 18 17 16 16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18 15 2 12 1 13 16 16 16 16 1 1 16 16 16 16 16 16 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 15 2 13 15 13 2 2 15 15 13 2 1 1 2 2 2 1 2 16 16 16 16 16 16 16 16 1 1 1 1 1 1 1 1 1 1 2 2 2 16 13 15 16 16 16 16 16 16 16 16 16 16 16 16 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/iomap.h> #include <linux/ktime.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" #include "glock.h" #include "inode.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "log.h" #include "super.h" #include "trans.h" #include "dir.h" #include "util.h" #include "aops.h" #include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to * keep it small. */ struct metapath { struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; __u16 mp_list[GFS2_MAX_META_HEIGHT]; int mp_fheight; /* find_metapath height */ int mp_aheight; /* actual height (lookup height) */ }; static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); /** * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio * @ip: the inode * @dibh: the dinode buffer * @block: the block number that was allocated * @folio: The folio. * * Returns: errno */ static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh, u64 block, struct folio *folio) { struct inode *inode = &ip->i_inode; if (!folio_test_uptodate(folio)) { void *kaddr = kmap_local_folio(folio, 0); u64 dsize = i_size_read(inode); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, folio_size(folio) - dsize); kunmap_local(kaddr); folio_mark_uptodate(folio); } if (gfs2_is_jdata(ip)) { struct buffer_head *bh = folio_buffers(folio); if (!bh) bh = create_empty_buffers(folio, BIT(inode->i_blkbits), BIT(BH_Uptodate)); if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, block); set_buffer_uptodate(bh); gfs2_trans_add_data(ip->i_gl, bh); } else { folio_mark_dirty(folio); gfs2_ordered_add_inode(ip); } return 0; } static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio) { struct buffer_head *bh, *dibh; struct gfs2_dinode *di; u64 block = 0; int isdir = gfs2_is_dir(ip); int error; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) return error; if (i_size_read(&ip->i_inode)) { /* Get a free block, fill it with the stuffed data, and write it out to disk */ unsigned int n = 1; error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) goto out_brelse; if (isdir) { gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1); error = gfs2_dir_get_new_buffer(ip, block, &bh); if (error) goto out_brelse; gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), dibh, sizeof(struct gfs2_dinode)); brelse(bh); } else { error = gfs2_unstuffer_folio(ip, dibh, block, folio); if (error) goto out_brelse; } } /* Set up the pointer to the new block */ gfs2_trans_add_meta(ip->i_gl, dibh); di = (struct gfs2_dinode *)dibh->b_data; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); if (i_size_read(&ip->i_inode)) { *(__be64 *)(di + 1) = cpu_to_be64(block); gfs2_add_inode_blocks(&ip->i_inode, 1); di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); } ip->i_height = 1; di->di_height = cpu_to_be16(1); out_brelse: brelse(dibh); return error; } /** * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big * @ip: The GFS2 inode to unstuff * * This routine unstuffs a dinode and returns it to a "normal" state such * that the height can be grown in the traditional way. * * Returns: errno */ int gfs2_unstuff_dinode(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; struct folio *folio; int error; down_write(&ip->i_rw_mutex); folio = filemap_grab_folio(inode->i_mapping, 0); error = PTR_ERR(folio); if (IS_ERR(folio)) goto out; error = __gfs2_unstuff_inode(ip, folio); folio_unlock(folio); folio_put(folio); out: up_write(&ip->i_rw_mutex); return error; } /** * find_metapath - Find path through the metadata tree * @sdp: The superblock * @block: The disk block to look up * @mp: The metapath to return the result in * @height: The pre-calculated height of the metadata tree * * This routine returns a struct metapath structure that defines a path * through the metadata of inode "ip" to get to block "block". * * Example: * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a * filesystem with a blocksize of 4096. * * find_metapath() would return a struct metapath structure set to: * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165. * * That means that in order to get to the block containing the byte at * offset 101342453, we would load the indirect block pointed to by pointer * 0 in the dinode. We would then load the indirect block pointed to by * pointer 48 in that indirect block. We would then load the data block * pointed to by pointer 165 in that indirect block. * * ---------------------------------------- * | Dinode | | * | | 4| * | |0 1 2 3 4 5 9| * | | 6| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Indirect Block | * | 5| * | 4 4 4 4 4 5 5 1| * |0 5 6 7 8 9 0 1 2| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Indirect Block | * | 1 1 1 1 1 5| * | 6 6 6 6 6 1| * |0 3 4 5 6 7 2| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Data block containing offset | * | 101342453 | * | | * | | * ---------------------------------------- * */ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, struct metapath *mp, unsigned int height) { unsigned int i; mp->mp_fheight = height; for (i = height; i--;) mp->mp_list[i] = do_div(block, sdp->sd_inptrs); } static inline unsigned int metapath_branch_start(const struct metapath *mp) { if (mp->mp_list[0] == 0) return 2; return 1; } /** * metaptr1 - Return the first possible metadata pointer in a metapath buffer * @height: The metadata height (0 = dinode) * @mp: The metapath */ static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) { struct buffer_head *bh = mp->mp_bh[height]; if (height == 0) return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); } /** * metapointer - Return pointer to start of metadata in a buffer * @height: The metadata height (0 = dinode) * @mp: The metapath * * Return a pointer to the block number of the next height of the metadata * tree given a buffer containing the pointer to the current height of the * metadata tree. */ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) { __be64 *p = metaptr1(height, mp); return p + mp->mp_list[height]; } static inline const __be64 *metaend(unsigned int height, const struct metapath *mp) { const struct buffer_head *bh = mp->mp_bh[height]; return (const __be64 *)(bh->b_data + bh->b_size); } static void clone_metapath(struct metapath *clone, struct metapath *mp) { unsigned int hgt; *clone = *mp; for (hgt = 0; hgt < mp->mp_aheight; hgt++) get_bh(clone->mp_bh[hgt]); } static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) { const __be64 *t; for (t = start; t < end; t++) { struct buffer_head *rabh; if (!*t) continue; rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | REQ_PRIO, rabh); continue; } unlock_buffer(rabh); } brelse(rabh); } } static inline struct buffer_head * metapath_dibh(struct metapath *mp) { return mp->mp_bh[0]; } static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, unsigned int x, unsigned int h) { for (; x < h; x++) { __be64 *ptr = metapointer(x, mp); u64 dblock = be64_to_cpu(*ptr); int ret; if (!dblock) break; ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]); if (ret) return ret; } mp->mp_aheight = x + 1; return 0; } /** * lookup_metapath - Walk the metadata tree to a specific point * @ip: The inode * @mp: The metapath * * Assumes that the inode's buffer has already been looked up and * hooked onto mp->mp_bh[0] and that the metapath has been initialised * by find_metapath(). * * If this function encounters part of the tree which has not been * allocated, it returns the current height of the tree at the point * at which it found the unallocated block. Blocks which are found are * added to the mp->mp_bh[] list. * * Returns: error */ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) { return __fillup_metapath(ip, mp, 0, ip->i_height - 1); } /** * fillup_metapath - fill up buffers for the metadata path to a specific height * @ip: The inode * @mp: The metapath * @h: The height to which it should be mapped * * Similar to lookup_metapath, but does lookups for a range of heights * * Returns: error or the number of buffers filled */ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) { unsigned int x = 0; int ret; if (h) { /* find the first buffer we need to look up. */ for (x = h - 1; x > 0; x--) { if (mp->mp_bh[x]) break; } } ret = __fillup_metapath(ip, mp, x, h); if (ret) return ret; return mp->mp_aheight - x - 1; } static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp) { sector_t factor = 1, block = 0; int hgt; for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) { if (hgt < mp->mp_aheight) block += mp->mp_list[hgt] * factor; factor *= sdp->sd_inptrs; } return block; } static void release_metapath(struct metapath *mp) { int i; for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } } /** * gfs2_extent_length - Returns length of an extent of blocks * @bh: The metadata block * @ptr: Current position in @bh * @eob: Set to 1 if we hit "end of block" * * Returns: The length of the extent (minimum of one block) */ static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob) { const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; u64 d = be64_to_cpu(*ptr); *eob = 0; do { ptr++; if (ptr >= end) break; d++; } while(be64_to_cpu(*ptr) == d); if (ptr >= end) *eob = 1; return ptr - first; } enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE }; /* * gfs2_metadata_walker - walk an indirect block * @mp: Metapath to indirect block * @ptrs: Number of pointers to look at * * When returning WALK_FOLLOW, the walker must update @mp to point at the right * indirect block to follow. */ typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp, unsigned int ptrs); /* * gfs2_walk_metadata - walk a tree of indirect blocks * @inode: The inode * @mp: Starting point of walk * @max_len: Maximum number of blocks to walk * @walker: Called during the walk * * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or * past the end of metadata, and a negative error code otherwise. */ static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp, u64 max_len, gfs2_metadata_walker walker) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); u64 factor = 1; unsigned int hgt; int ret; /* * The walk starts in the lowest allocated indirect block, which may be * before the position indicated by @mp. Adjust @max_len accordingly * to avoid a short walk. */ for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) { max_len += mp->mp_list[hgt] * factor; mp->mp_list[hgt] = 0; factor *= sdp->sd_inptrs; } for (;;) { u16 start = mp->mp_list[hgt]; enum walker_status status; unsigned int ptrs; u64 len; /* Walk indirect block. */ ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start; len = ptrs * factor; if (len > max_len) ptrs = DIV_ROUND_UP_ULL(max_len, factor); status = walker(mp, ptrs); switch (status) { case WALK_STOP: return 1; case WALK_FOLLOW: BUG_ON(mp->mp_aheight == mp->mp_fheight); ptrs = mp->mp_list[hgt] - start; len = ptrs * factor; break; case WALK_CONTINUE: break; } if (len >= max_len) break; max_len -= len; if (status == WALK_FOLLOW) goto fill_up_metapath; lower_metapath: /* Decrease height of metapath. */ brelse(mp->mp_bh[hgt]); mp->mp_bh[hgt] = NULL; mp->mp_list[hgt] = 0; if (!hgt) break; hgt--; factor *= sdp->sd_inptrs; /* Advance in metadata tree. */ (mp->mp_list[hgt])++; if (hgt) { if (mp->mp_list[hgt] >= sdp->sd_inptrs) goto lower_metapath; } else { if (mp->mp_list[hgt] >= sdp->sd_diptrs) break; } fill_up_metapath: /* Increase height of metapath. */ ret = fillup_metapath(ip, mp, ip->i_height - 1); if (ret < 0) return ret; hgt += ret; for (; ret; ret--) do_div(factor, sdp->sd_inptrs); mp->mp_aheight = hgt + 1; } return 0; } static enum walker_status gfs2_hole_walker(struct metapath *mp, unsigned int ptrs) { const __be64 *start, *ptr, *end; unsigned int hgt; hgt = mp->mp_aheight - 1; start = metapointer(hgt, mp); end = start + ptrs; for (ptr = start; ptr < end; ptr++) { if (*ptr) { mp->mp_list[hgt] += ptr - start; if (mp->mp_aheight == mp->mp_fheight) return WALK_STOP; return WALK_FOLLOW; } } return WALK_CONTINUE; } /** * gfs2_hole_size - figure out the size of a hole * @inode: The inode * @lblock: The logical starting block number * @len: How far to look (in blocks) * @mp: The metapath at lblock * @iomap: The iomap to store the hole size in * * This function modifies @mp. * * Returns: errno on error */ static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, struct metapath *mp, struct iomap *iomap) { struct metapath clone; u64 hole_size; int ret; clone_metapath(&clone, mp); ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker); if (ret < 0) goto out; if (ret == 1) hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock; else hole_size = len; iomap->length = hole_size << inode->i_blkbits; ret = 0; out: release_metapath(&clone); return ret; } static inline void gfs2_indirect_init(struct metapath *mp, struct gfs2_glock *gl, unsigned int i, unsigned offset, u64 bn) { __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + ((i > 1) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode))); BUG_ON(i < 1); BUG_ON(mp->mp_bh[i] != NULL); mp->mp_bh[i] = gfs2_meta_new(gl, bn); gfs2_trans_add_meta(gl, mp->mp_bh[i]); gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); ptr += offset; *ptr = cpu_to_be64(bn); } enum alloc_state { ALLOC_DATA = 0, ALLOC_GROW_DEPTH = 1, ALLOC_GROW_HEIGHT = 2, /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ }; /** * __gfs2_iomap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode * @iomap: The iomap structure * @mp: The metapath, with proper height information calculated * * In this routine we may have to alloc: * i) Indirect blocks to grow the metadata tree height * ii) Indirect blocks to fill in lower part of the metadata tree * iii) Data blocks * * This function is called after __gfs2_iomap_get, which works out the * total number of blocks which we need via gfs2_alloc_size. * * We then do the actual allocation asking for an extent at a time (if * enough contiguous free blocks are available, there will only be one * allocation request per call) and uses the state machine to initialise * the blocks in order. * * Right now, this function will allocate at most one indirect block * worth of data -- with a default block size of 4K, that's slightly * less than 2M. If this limitation is ever removed to allow huge * allocations, we would probably still want to limit the iomap size we * return to avoid stalling other tasks during huge writes; the next * iomap iteration would then find the blocks already allocated. * * Returns: errno on error */ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = metapath_dibh(mp); u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; size_t dblks = iomap->length >> inode->i_blkbits; const unsigned end_of_metadata = mp->mp_fheight - 1; int ret; enum alloc_state state; __be64 *ptr; __be64 zero_bn = 0; BUG_ON(mp->mp_aheight < 1); BUG_ON(dibh == NULL); BUG_ON(dblks < 1); gfs2_trans_add_meta(ip->i_gl, dibh); down_write(&ip->i_rw_mutex); if (mp->mp_fheight == mp->mp_aheight) { /* Bottom indirect block exists */ state = ALLOC_DATA; } else { /* Need to allocate indirect blocks */ if (mp->mp_fheight == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = mp->mp_fheight - mp->mp_aheight; state = ALLOC_GROW_DEPTH; } else { /* Building up tree height */ state = ALLOC_GROW_HEIGHT; iblks = mp->mp_fheight - ip->i_height; branch_start = metapath_branch_start(mp); iblks += (mp->mp_fheight - branch_start); } } /* start of the second part of the function (state machine) */ blks = dblks + iblks; i = mp->mp_aheight; do { n = blks - alloced; ret = gfs2_alloc_blocks(ip, &bn, &n, 0); if (ret) goto out; alloced += n; if (state != ALLOC_DATA || gfs2_is_jdata(ip)) gfs2_trans_remove_revoke(sdp, bn, n); switch (state) { /* Growing height of tree */ case ALLOC_GROW_HEIGHT: if (i == 1) { ptr = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); zero_bn = *ptr; } for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0; i++, n--) gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); if (i - 1 == mp->mp_fheight - ip->i_height) { i--; gfs2_buffer_copy_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header), dibh, sizeof(struct gfs2_dinode)); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + sizeof(__be64)); ptr = (__be64 *)(mp->mp_bh[i]->b_data + sizeof(struct gfs2_meta_header)); *ptr = zero_bn; state = ALLOC_GROW_DEPTH; for(i = branch_start; i < mp->mp_fheight; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } i = branch_start; } if (n == 0) break; fallthrough; /* To branching from existing tree */ case ALLOC_GROW_DEPTH: if (i > 1 && i < mp->mp_fheight) gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); for (; i < mp->mp_fheight && n > 0; i++, n--) gfs2_indirect_init(mp, ip->i_gl, i, mp->mp_list[i-1], bn++); if (i == mp->mp_fheight) state = ALLOC_DATA; if (n == 0) break; fallthrough; /* To tree complete, adding data blocks */ case ALLOC_DATA: BUG_ON(n > dblks); BUG_ON(mp->mp_bh[end_of_metadata] == NULL); gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); dblks = n; ptr = metapointer(end_of_metadata, mp); iomap->addr = bn << inode->i_blkbits; iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW; while (n-- > 0) *ptr++ = cpu_to_be64(bn++); break; } } while (iomap->addr == IOMAP_NULL_ADDR); iomap->type = IOMAP_MAPPED; iomap->length = (u64)dblks << inode->i_blkbits; ip->i_height = mp->mp_fheight; gfs2_add_inode_blocks(&ip->i_inode, alloced); gfs2_dinode_out(ip, dibh->b_data); out: up_write(&ip->i_rw_mutex); return ret; } #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE /** * gfs2_alloc_size - Compute the maximum allocation size * @inode: The inode * @mp: The metapath * @size: Requested size in blocks * * Compute the maximum size of the next allocation at @mp. * * Returns: size in blocks */ static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); const __be64 *first, *ptr, *end; /* * For writes to stuffed files, this function is called twice via * __gfs2_iomap_get, before and after unstuffing. The size we return the * first time needs to be large enough to get the reservation and * allocation sizes right. The size we return the second time must * be exact or else __gfs2_iomap_alloc won't do the right thing. */ if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { unsigned int maxsize = mp->mp_fheight > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; maxsize -= mp->mp_list[mp->mp_fheight - 1]; if (size > maxsize) size = maxsize; return size; } first = metapointer(ip->i_height - 1, mp); end = metaend(ip->i_height - 1, mp); if (end - first > size) end = first + size; for (ptr = first; ptr < end; ptr++) { if (*ptr) break; } return ptr - first; } /** * __gfs2_iomap_get - Map blocks from an inode to disk blocks * @inode: The inode * @pos: Starting position in bytes * @length: Length to map, in bytes * @flags: iomap flags * @iomap: The iomap structure * @mp: The metapath * * Returns: errno */ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); loff_t size = i_size_read(inode); __be64 *ptr; sector_t lblock; sector_t lblock_stop; int ret; int eob; u64 len; struct buffer_head *dibh = NULL, *bh; u8 height; if (!length) return -EINVAL; down_read(&ip->i_rw_mutex); ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto unlock; mp->mp_bh[0] = dibh; if (gfs2_is_stuffed(ip)) { if (flags & IOMAP_WRITE) { loff_t max_size = gfs2_max_stuffed_size(ip); if (pos + length > max_size) goto unstuff; iomap->length = max_size; } else { if (pos >= size) { if (flags & IOMAP_REPORT) { ret = -ENOENT; goto unlock; } else { iomap->offset = pos; iomap->length = length; goto hole_found; } } iomap->length = size; } iomap->addr = (ip->i_no_addr << inode->i_blkbits) + sizeof(struct gfs2_dinode); iomap->type = IOMAP_INLINE; iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); goto out; } unstuff: lblock = pos >> inode->i_blkbits; iomap->offset = lblock << inode->i_blkbits; lblock_stop = (pos + length - 1) >> inode->i_blkbits; len = lblock_stop - lblock + 1; iomap->length = len << inode->i_blkbits; height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) height++; find_metapath(sdp, lblock, mp, height); if (height > ip->i_height || gfs2_is_stuffed(ip)) goto do_alloc; ret = lookup_metapath(ip, mp); if (ret) goto unlock; if (mp->mp_aheight != ip->i_height) goto do_alloc; ptr = metapointer(ip->i_height - 1, mp); if (*ptr == 0) goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; len = gfs2_extent_length(bh, ptr, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; if (eob) iomap->flags |= IOMAP_F_GFS2_BOUNDARY; out: iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); return ret; do_alloc: if (flags & IOMAP_REPORT) { if (pos >= size) ret = -ENOENT; else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else iomap->length = size - iomap->offset; } else if (flags & IOMAP_WRITE) { u64 alloc_size; if (flags & IOMAP_DIRECT) goto out; /* (see gfs2_file_direct_write) */ len = gfs2_alloc_size(inode, mp, len); alloc_size = len << inode->i_blkbits; if (alloc_size < iomap->length) iomap->length = alloc_size; } else { if (pos < size && height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } hole_found: iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_HOLE; goto out; } static struct folio * gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) { struct inode *inode = iter->inode; unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int blocks; struct folio *folio; int status; blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); if (status) return ERR_PTR(status); folio = iomap_get_folio(iter, pos, len); if (IS_ERR(folio)) gfs2_trans_end(sdp); return folio; } static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio) { struct gfs2_trans *tr = current->journal_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); if (!gfs2_is_stuffed(ip)) gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos), copied); folio_unlock(folio); folio_put(folio); if (tr->tr_num_buf_new) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); } static const struct iomap_folio_ops gfs2_iomap_folio_ops = { .get_folio = gfs2_iomap_get_folio, .put_folio = gfs2_iomap_put_folio, }; static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); bool unstuff; int ret; unstuff = gfs2_is_stuffed(ip) && pos + length > gfs2_max_stuffed_size(ip); if (unstuff || iomap->type == IOMAP_HOLE) { unsigned int data_blocks, ind_blocks; struct gfs2_alloc_parms ap = {}; unsigned int rblocks; struct gfs2_trans *tr; gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_quota_lock_check(ip, &ap); if (ret) return ret; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_qunlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks; if (ind_blocks || data_blocks) rblocks += RES_STATFS + RES_QUOTA; if (inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); if (ret) goto out_trans_fail; if (unstuff) { ret = gfs2_unstuff_dinode(ip); if (ret) goto out_trans_end; release_metapath(mp); ret = __gfs2_iomap_get(inode, iomap->offset, iomap->length, flags, iomap, mp); if (ret) goto out_trans_end; } if (iomap->type == IOMAP_HOLE) { ret = __gfs2_iomap_alloc(inode, iomap, mp); if (ret) { gfs2_trans_end(sdp); gfs2_inplace_release(ip); punch_hole(ip, iomap->offset, iomap->length); goto out_qunlock; } } tr = current->journal_info; if (tr->tr_num_buf_new) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); } if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) iomap->folio_ops = &gfs2_iomap_folio_ops; return 0; out_trans_end: gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); return ret; } static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct iomap *srcmap) { struct gfs2_inode *ip = GFS2_I(inode); struct metapath mp = { .mp_aheight = 1, }; int ret; if (gfs2_is_jdata(ip)) iomap->flags |= IOMAP_F_BUFFER_HEAD; trace_gfs2_iomap_start(ip, pos, length, flags); ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); if (ret) goto out_unlock; switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) { case IOMAP_WRITE: if (flags & IOMAP_DIRECT) { /* * Silently fall back to buffered I/O for stuffed files * or if we've got a hole (see gfs2_file_direct_write). */ if (iomap->type != IOMAP_MAPPED) ret = -ENOTBLK; goto out_unlock; } break; case IOMAP_ZERO: if (iomap->type == IOMAP_HOLE) goto out_unlock; break; default: goto out_unlock; } ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); out_unlock: release_metapath(&mp); trace_gfs2_iomap_end(ip, iomap, ret); return ret; } static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) { case IOMAP_WRITE: if (flags & IOMAP_DIRECT) return 0; break; case IOMAP_ZERO: if (iomap->type == IOMAP_HOLE) return 0; break; default: return 0; } if (!gfs2_is_stuffed(ip)) gfs2_ordered_add_inode(ip); if (inode == sdp->sd_rindex) adjust_fs_space(inode); gfs2_inplace_release(ip); if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ loff_t hstart = round_up(pos + written, i_blocksize(inode)); loff_t hend = iomap->offset + iomap->length; if (hstart < hend) { truncate_pagecache_range(inode, hstart, hend - 1); punch_hole(ip, hstart, hend - hstart); } } if (unlikely(!written)) return 0; if (iomap->flags & IOMAP_F_SIZE_CHANGED) mark_inode_dirty(inode); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); return 0; } const struct iomap_ops gfs2_iomap_ops = { .iomap_begin = gfs2_iomap_begin, .iomap_end = gfs2_iomap_end, }; /** * gfs2_block_map - Map one or more blocks of an inode to a disk block * @inode: The inode * @lblock: The logical block number * @bh_map: The bh to be mapped * @create: True if its ok to alloc blocks to satify the request * * The size of the requested mapping is defined in bh_map->b_size. * * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged * when @lblock is not mapped. Sets buffer_mapped(bh_map) and * bh_map->b_size to indicate the size of the mapping when @lblock and * successive blocks are mapped, up to the requested size. * * Sets buffer_boundary() if a read of metadata will be required * before the next block can be mapped. Sets buffer_new() if new * blocks were allocated. * * Returns: errno */ int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh_map, int create) { struct gfs2_inode *ip = GFS2_I(inode); loff_t pos = (loff_t)lblock << inode->i_blkbits; loff_t length = bh_map->b_size; struct iomap iomap = { }; int ret; clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); clear_buffer_boundary(bh_map); trace_gfs2_bmap(ip, bh_map, lblock, create, 1); if (!create) ret = gfs2_iomap_get(inode, pos, length, &iomap); else ret = gfs2_iomap_alloc(inode, pos, length, &iomap); if (ret) goto out; if (iomap.length > bh_map->b_size) { iomap.length = bh_map->b_size; iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY; } if (iomap.addr != IOMAP_NULL_ADDR) map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits); bh_map->b_size = iomap.length; if (iomap.flags & IOMAP_F_GFS2_BOUNDARY) set_buffer_boundary(bh_map); if (iomap.flags & IOMAP_F_NEW) set_buffer_new(bh_map); out: trace_gfs2_bmap(ip, bh_map, lblock, create, ret); return ret; } int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, unsigned int *extlen) { unsigned int blkbits = inode->i_blkbits; struct iomap iomap = { }; unsigned int len; int ret; ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits, &iomap); if (ret) return ret; if (iomap.type != IOMAP_MAPPED) return -EIO; *dblock = iomap.addr >> blkbits; len = iomap.length >> blkbits; if (len < *extlen) *extlen = len; return 0; } int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, unsigned int *extlen, bool *new) { unsigned int blkbits = inode->i_blkbits; struct iomap iomap = { }; unsigned int len; int ret; ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits, &iomap); if (ret) return ret; if (iomap.type != IOMAP_MAPPED) return -EIO; *dblock = iomap.addr >> blkbits; len = iomap.length >> blkbits; if (len < *extlen) *extlen = len; *new = iomap.flags & IOMAP_F_NEW; return 0; } /* * NOTE: Never call gfs2_block_zero_range with an open transaction because it * uses iomap write to perform its actions, which begin their own transactions * (iomap_begin, get_folio, etc.) */ static int gfs2_block_zero_range(struct inode *inode, loff_t from, unsigned int length) { BUG_ON(current->journal_info); return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); } #define GFS2_JTRUNC_REVOKES 8192 /** * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files * @inode: The inode being truncated * @oldsize: The original (larger) size * @newsize: The new smaller size * * With jdata files, we have to journal a revoke for each block which is * truncated. As a result, we need to split this into separate transactions * if the number of pages being truncated gets too large. */ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_sbd *sdp = GFS2_SB(inode); u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; u64 chunk; int error; while (oldsize != newsize) { struct gfs2_trans *tr; unsigned int offs; chunk = oldsize - newsize; if (chunk > max_chunk) chunk = max_chunk; offs = oldsize & ~PAGE_MASK; if (offs && chunk > PAGE_SIZE) chunk = offs + ((chunk - offs) & PAGE_MASK); truncate_pagecache(inode, oldsize - chunk); oldsize -= chunk; tr = current->journal_info; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) continue; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); if (error) return error; } return 0; } static int trunc_start(struct inode *inode, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = NULL; int journaled = gfs2_is_jdata(ip); u64 oldsize = inode->i_size; int error; if (!gfs2_is_stuffed(ip)) { unsigned int blocksize = i_blocksize(inode); unsigned int offs = newsize & (blocksize - 1); if (offs) { error = gfs2_block_zero_range(inode, newsize, blocksize - offs); if (error) return error; } } if (journaled) error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); else error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); else ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; i_size_write(inode, newsize); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_dinode_out(ip, dibh->b_data); if (journaled) error = gfs2_journaled_truncate(inode, oldsize, newsize); else truncate_pagecache(inode, newsize); out: brelse(dibh); if (current->journal_info) gfs2_trans_end(sdp); return error; } int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap) { struct metapath mp = { .mp_aheight = 1, }; int ret; ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp); release_metapath(&mp); return ret; } int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap) { struct metapath mp = { .mp_aheight = 1, }; int ret; ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); if (!ret && iomap->type == IOMAP_HOLE) ret = __gfs2_iomap_alloc(inode, iomap, &mp); release_metapath(&mp); return ret; } /** * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein * @ip: inode * @rd_gh: holder of resource group glock * @bh: buffer head to sweep * @start: starting point in bh * @end: end point in bh * @meta: true if bh points to metadata (rather than data) * @btotal: place to keep count of total blocks freed * * We sweep a metadata buffer (provided by the metapath) for blocks we need to * free, and free them all. However, we do it one rgrp at a time. If this * block has references to multiple rgrps, we break it into individual * transactions. This allows other processes to use the rgrps while we're * focused on a single one, for better concurrency / performance. * At every transaction boundary, we rewrite the inode into the journal. * That way the bitmaps are kept consistent with the inode and we can recover * if we're interrupted by power-outages. * * Returns: 0, or return code if an error occurred. * *btotal has the total number of blocks freed */ static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, struct buffer_head *bh, __be64 *start, __be64 *end, bool meta, u32 *btotal) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; struct gfs2_trans *tr; __be64 *p; int blks_outside_rgrp; u64 bn, bstart, isize_blks; s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ int ret = 0; bool buf_in_tr = false; /* buffer was added to transaction */ more_rgrps: rgd = NULL; if (gfs2_holder_initialized(rd_gh)) { rgd = gfs2_glock2rgrp(rd_gh->gh_gl); gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); } blks_outside_rgrp = 0; bstart = 0; blen = 0; for (p = start; p < end; p++) { if (!*p) continue; bn = be64_to_cpu(*p); if (rgd) { if (!rgrp_contains_block(rgd, bn)) { blks_outside_rgrp++; continue; } } else { rgd = gfs2_blk2rgrpd(sdp, bn, true); if (unlikely(!rgd)) { ret = -EIO; goto out; } ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, rd_gh); if (ret) goto out; /* Must be done with the rgrp glock held: */ if (gfs2_rs_active(&ip->i_res) && rgd == ip->i_res.rs_rgd) gfs2_rs_deltree(&ip->i_res); } /* The size of our transactions will be unknown until we actually process all the metadata blocks that relate to the rgrp. So we estimate. We know it can't be more than the dinode's i_blocks and we don't want to exceed the journal flush threshold, sd_log_thresh2. */ if (current->journal_info == NULL) { unsigned int jblocks_rqsted, revokes; jblocks_rqsted = rgd->rd_length + RES_DINODE + RES_INDIRECT; isize_blks = gfs2_get_inode_blocks(&ip->i_inode); if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) jblocks_rqsted += atomic_read(&sdp->sd_log_thresh2); else jblocks_rqsted += isize_blks; revokes = jblocks_rqsted; if (meta) revokes += end - start; else if (ip->i_depth) revokes += sdp->sd_inptrs; ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); if (ret) goto out_unlock; down_write(&ip->i_rw_mutex); } /* check if we will exceed the transaction blocks requested */ tr = current->journal_info; if (tr->tr_num_buf_new + RES_STATFS + RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { /* We set blks_outside_rgrp to ensure the loop will be repeated for the same rgrp, but with a new transaction. */ blks_outside_rgrp++; /* This next part is tricky. If the buffer was added to the transaction, we've already set some block pointers to 0, so we better follow through and free them, or we will introduce corruption (so break). This may be impossible, or at least rare, but I decided to cover the case regardless. If the buffer was not added to the transaction (this call), doing so would exceed our transaction size, so we need to end the transaction and start a new one (so goto). */ if (buf_in_tr) break; goto out_unlock; } gfs2_trans_add_meta(ip->i_gl, bh); buf_in_tr = true; *p = 0; if (bstart + blen == bn) { blen++; continue; } if (bstart) { __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); (*btotal) += blen; gfs2_add_inode_blocks(&ip->i_inode, -blen); } bstart = bn; blen = 1; } if (bstart) { __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); (*btotal) += blen; gfs2_add_inode_blocks(&ip->i_inode, -blen); } out_unlock: if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks outside the rgrp we just processed, do it all over again. */ if (current->journal_info) { struct buffer_head *dibh; ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto out; /* Every transaction boundary, we rewrite the dinode to keep its di_blocks current in case of failure. */ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); buf_in_tr = false; } gfs2_glock_dq_uninit(rd_gh); cond_resched(); goto more_rgrps; } out: return ret; } static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h) { if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0]))) return false; return true; } /** * find_nonnull_ptr - find a non-null pointer given a metapath and height * @sdp: The superblock * @mp: starting metapath * @h: desired height to search * @end_list: See punch_hole(). * @end_aligned: See punch_hole(). * * Assumes the metapath is valid (with buffers) out to height h. * Returns: true if a non-null pointer was found in the metapath buffer * false if all remaining pointers are NULL in the buffer */ static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, unsigned int h, __u16 *end_list, unsigned int end_aligned) { struct buffer_head *bh = mp->mp_bh[h]; __be64 *first, *ptr, *end; first = metaptr1(h, mp); ptr = first + mp->mp_list[h]; end = (__be64 *)(bh->b_data + bh->b_size); if (end_list && mp_eq_to_hgt(mp, end_list, h)) { bool keep_end = h < end_aligned; end = first + end_list[h] + keep_end; } while (ptr < end) { if (*ptr) { /* if we have a non-null pointer */ mp->mp_list[h] = ptr - first; h++; if (h < GFS2_MAX_META_HEIGHT) mp->mp_list[h] = 0; return true; } ptr++; } return false; } enum dealloc_states { DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ DEALLOC_DONE = 3, /* process complete */ }; static inline void metapointer_range(struct metapath *mp, int height, __u16 *start_list, unsigned int start_aligned, __u16 *end_list, unsigned int end_aligned, __be64 **start, __be64 **end) { struct buffer_head *bh = mp->mp_bh[height]; __be64 *first; first = metaptr1(height, mp); *start = first; if (mp_eq_to_hgt(mp, start_list, height)) { bool keep_start = height < start_aligned; *start = first + start_list[height] + keep_start; } *end = (__be64 *)(bh->b_data + bh->b_size); if (end_list && mp_eq_to_hgt(mp, end_list, height)) { bool keep_end = height < end_aligned; *end = first + end_list[height] + keep_end; } } static inline bool walk_done(struct gfs2_sbd *sdp, struct metapath *mp, int height, __u16 *end_list, unsigned int end_aligned) { __u16 end; if (end_list) { bool keep_end = height < end_aligned; if (!mp_eq_to_hgt(mp, end_list, height)) return false; end = end_list[height] + keep_end; } else end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs; return mp->mp_list[height] >= end; } /** * punch_hole - deallocate blocks in a file * @ip: inode to truncate * @offset: the start of the hole * @length: the size of the hole (or 0 for truncate) * * Punch a hole into a file or truncate a file at a given position. This * function operates in whole blocks (@offset and @length are rounded * accordingly); partially filled blocks must be cleared otherwise. * * This function works from the bottom up, and from the right to the left. In * other words, it strips off the highest layer (data) before stripping any of * the metadata. Doing it this way is best in case the operation is interrupted * by power failure, etc. The dinode is rewritten in every transaction to * guarantee integrity. */ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); u64 maxsize = sdp->sd_heightsize[ip->i_height]; struct metapath mp = {}; struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int bsize = 1 << bsize_shift; u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; unsigned int strip_h = ip->i_height - 1; u32 btotal = 0; int ret, state; int mp_h; /* metapath buffers are read in to this height */ u64 prev_bnr = 0; __be64 *start, *end; if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated metadata; * there are no blocks to deallocate. */ return 0; } /* * The start position of the hole is defined by lblock, start_list, and * start_aligned. The end position of the hole is defined by lend, * end_list, and end_aligned. * * start_aligned and end_aligned define down to which height the start * and end positions are aligned to the metadata tree (i.e., the * position is a multiple of the metadata granularity at the height * above). This determines at which heights additional meta pointers * needs to be preserved for the remaining data. */ if (length) { u64 end_offset = offset + length; u64 lend; /* * Clip the end at the maximum file size for the given height: * that's how far the metadata goes; files bigger than that * will have additional layers of indirection. */ if (end_offset > maxsize) end_offset = maxsize; lend = end_offset >> bsize_shift; if (lblock >= lend) return 0; find_metapath(sdp, lend, &mp, ip->i_height); end_list = __end_list; memcpy(end_list, mp.mp_list, sizeof(mp.mp_list)); for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { if (end_list[mp_h]) break; } end_aligned = mp_h; } find_metapath(sdp, lblock, &mp, ip->i_height); memcpy(start_list, mp.mp_list, sizeof(start_list)); for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { if (start_list[mp_h]) break; } start_aligned = mp_h; ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) return ret; mp.mp_bh[0] = dibh; ret = lookup_metapath(ip, &mp); if (ret) goto out_metapath; /* issue read-ahead on metadata */ for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) { metapointer_range(&mp, mp_h, start_list, start_aligned, end_list, end_aligned, &start, &end); gfs2_metapath_ra(ip->i_gl, start, end); } if (mp.mp_aheight == ip->i_height) state = DEALLOC_MP_FULL; /* We have a complete metapath */ else state = DEALLOC_FILL_MP; /* deal with partial metapath */ ret = gfs2_rindex_update(sdp); if (ret) goto out_metapath; ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) goto out_metapath; gfs2_holder_mark_uninitialized(&rd_gh); mp_h = strip_h; while (state != DEALLOC_DONE) { switch (state) { /* Truncate a full metapath at the given strip height. * Note that strip_h == mp_h in order to be in this state. */ case DEALLOC_MP_FULL: bh = mp.mp_bh[mp_h]; gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, prev_bnr != bh->b_blocknr)) { fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, " "s_h:%u, mp_h:%u\n", (unsigned long long)ip->i_no_addr, prev_bnr, ip->i_height, strip_h, mp_h); } prev_bnr = bh->b_blocknr; if (gfs2_metatype_check(sdp, bh, (mp_h ? GFS2_METATYPE_IN : GFS2_METATYPE_DI))) { ret = -EIO; goto out; } /* * Below, passing end_aligned as 0 gives us the * metapointer range excluding the end point: the end * point is the first metapath we must not deallocate! */ metapointer_range(&mp, mp_h, start_list, start_aligned, end_list, 0 /* end_aligned */, &start, &end); ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h], start, end, mp_h != ip->i_height - 1, &btotal); /* If we hit an error or just swept dinode buffer, just exit. */ if (ret || !mp_h) { state = DEALLOC_DONE; break; } state = DEALLOC_MP_LOWER; break; /* lower the metapath strip height */ case DEALLOC_MP_LOWER: /* We're done with the current buffer, so release it, unless it's the dinode buffer. Then back up to the previous pointer. */ if (mp_h) { brelse(mp.mp_bh[mp_h]); mp.mp_bh[mp_h] = NULL; } /* If we can't get any lower in height, we've stripped off all we can. Next step is to back up and start stripping the previous level of metadata. */ if (mp_h == 0) { strip_h--; memcpy(mp.mp_list, start_list, sizeof(start_list)); mp_h = strip_h; state = DEALLOC_FILL_MP; break; } mp.mp_list[mp_h] = 0; mp_h--; /* search one metadata height down */ mp.mp_list[mp_h]++; if (walk_done(sdp, &mp, mp_h, end_list, end_aligned)) break; /* Here we've found a part of the metapath that is not * allocated. We need to search at that height for the * next non-null pointer. */ if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) { state = DEALLOC_FILL_MP; mp_h++; } /* No more non-null pointers at this height. Back up to the previous height and try again. */ break; /* loop around in the same state */ /* Fill the metapath with buffers to the given height. */ case DEALLOC_FILL_MP: /* Fill the buffers out to the current height. */ ret = fillup_metapath(ip, &mp, mp_h); if (ret < 0) goto out; /* On the first pass, issue read-ahead on metadata. */ if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { unsigned int height = mp.mp_aheight - 1; /* No read-ahead for data blocks. */ if (mp.mp_aheight - 1 == strip_h) height--; for (; height >= mp.mp_aheight - ret; height--) { metapointer_range(&mp, height, start_list, start_aligned, end_list, end_aligned, &start, &end); gfs2_metapath_ra(ip->i_gl, start, end); } } /* If buffers found for the entire strip height */ if (mp.mp_aheight - 1 == strip_h) { state = DEALLOC_MP_FULL; break; } if (mp.mp_aheight < ip->i_height) /* We have a partial height */ mp_h = mp.mp_aheight - 1; /* If we find a non-null block pointer, crawl a bit higher up in the metapath and try again, otherwise we need to look lower for a new starting point. */ if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) mp_h++; else state = DEALLOC_MP_LOWER; break; } } if (btotal) { if (current->journal_info == NULL) { ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (ret) goto out; down_write(&ip->i_rw_mutex); } gfs2_statfs_change(sdp, 0, +btotal, 0); gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, ip->i_inode.i_gid); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); } out: if (gfs2_holder_initialized(&rd_gh)) gfs2_glock_dq_uninit(&rd_gh); if (current->journal_info) { up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); cond_resched(); } gfs2_quota_unhold(ip); out_metapath: release_metapath(&mp); return ret; } static int trunc_end(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; int error; error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; down_write(&ip->i_rw_mutex); error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out; if (!i_size_read(&ip->i_inode)) { ip->i_height = 0; ip->i_goal = ip->i_no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); gfs2_ordered_del_inode(ip); } inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); return error; } /** * do_shrink - make a file smaller * @inode: the inode * @newsize: the size to make the file * * Called with an exclusive lock on @inode. The @size must * be equal to or smaller than the current inode size. * * Returns: errno */ static int do_shrink(struct inode *inode, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); int error; error = trunc_start(inode, newsize); if (error < 0) return error; if (gfs2_is_stuffed(ip)) return 0; error = punch_hole(ip, newsize, 0); if (error == 0) error = trunc_end(ip); return error; } /** * do_grow - Touch and update inode size * @inode: The inode * @size: The new size * * This function updates the timestamps on the inode and * may also increase the size of the inode. This function * must not be called with @size any smaller than the current * inode size. * * Although it is not strictly required to unstuff files here, * earlier versions of GFS2 have a bug in the stuffed file reading * code which will result in a buffer overrun if the size is larger * than the max stuffed file size. In order to prevent this from * occurring, such files are unstuffed, but in other cases we can * just update the inode size directly. * * Returns: 0 on success, or -ve on error */ static int do_grow(struct inode *inode, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .target = 1, }; struct buffer_head *dibh; int error; int unstuff = 0; if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) { error = gfs2_quota_lock_check(ip, &ap); if (error) return error; error = gfs2_inplace_reserve(ip, &ap); if (error) goto do_grow_qunlock; unstuff = 1; } error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + (unstuff && gfs2_is_jdata(ip) ? RES_JDATA : 0) + (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 0 : RES_QUOTA), 0); if (error) goto do_grow_release; if (unstuff) { error = gfs2_unstuff_dinode(ip); if (error) goto do_end_trans; } error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto do_end_trans; truncate_setsize(inode, size); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); do_end_trans: gfs2_trans_end(sdp); do_grow_release: if (unstuff) { gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); } return error; } /** * gfs2_setattr_size - make a file a given size * @inode: the inode * @newsize: the size to make the file * * The file size can grow, shrink, or stay the same size. This * is called holding i_rwsem and an exclusive glock on the inode * in question. * * Returns: errno */ int gfs2_setattr_size(struct inode *inode, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); int ret; BUG_ON(!S_ISREG(inode->i_mode)); ret = inode_newsize_ok(inode, newsize); if (ret) return ret; inode_dio_wait(inode); ret = gfs2_qa_get(ip); if (ret) goto out; if (newsize >= inode->i_size) { ret = do_grow(inode, newsize); goto out; } ret = do_shrink(inode, newsize); out: gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } int gfs2_truncatei_resume(struct gfs2_inode *ip) { int error; error = punch_hole(ip, i_size_read(&ip->i_inode), 0); if (!error) error = trunc_end(ip); return error; } int gfs2_file_dealloc(struct gfs2_inode *ip) { return punch_hole(ip, 0, 0); } /** * gfs2_free_journal_extents - Free cached journal bmap info * @jd: The journal * */ void gfs2_free_journal_extents(struct gfs2_jdesc *jd) { struct gfs2_journal_extent *jext; while(!list_empty(&jd->extent_list)) { jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list); list_del(&jext->list); kfree(jext); } } /** * gfs2_add_jextent - Add or merge a new extent to extent cache * @jd: The journal descriptor * @lblock: The logical block at start of new extent * @dblock: The physical block at start of new extent * @blocks: Size of extent in fs blocks * * Returns: 0 on success or -ENOMEM */ static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) { struct gfs2_journal_extent *jext; if (!list_empty(&jd->extent_list)) { jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list); if ((jext->dblock + jext->blocks) == dblock) { jext->blocks += blocks; return 0; } } jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); if (jext == NULL) return -ENOMEM; jext->dblock = dblock; jext->lblock = lblock; jext->blocks = blocks; list_add_tail(&jext->list, &jd->extent_list); jd->nr_extents++; return 0; } /** * gfs2_map_journal_extents - Cache journal bmap info * @sdp: The super block * @jd: The journal to map * * Create a reusable "extent" mapping from all logical * blocks to all physical blocks for the given journal. This will save * us time when writing journal blocks. Most journals will have only one * extent that maps all their logical blocks. That's because gfs2.mkfs * arranges the journal blocks sequentially to maximize performance. * So the extent would map the first block for the entire file length. * However, gfs2_jadd can happen while file activity is happening, so * those journals may not be sequential. Less likely is the case where * the users created their own journals by mounting the metafs and * laying it out. But it's still possible. These journals might have * several extents. * * Returns: 0 on success, or error on failure */ int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) { u64 lblock = 0; u64 lblock_stop; struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct buffer_head bh; unsigned int shift = sdp->sd_sb.sb_bsize_shift; u64 size; int rc; ktime_t start, end; start = ktime_get(); lblock_stop = i_size_read(jd->jd_inode) >> shift; size = (lblock_stop - lblock) << shift; jd->nr_extents = 0; WARN_ON(!list_empty(&jd->extent_list)); do { bh.b_state = 0; bh.b_blocknr = 0; bh.b_size = size; rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); if (rc || !buffer_mapped(&bh)) goto fail; rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); if (rc) goto fail; size -= bh.b_size; lblock += (bh.b_size >> ip->i_inode.i_blkbits); } while(size > 0); end = ktime_get(); fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid, jd->nr_extents, ktime_ms_delta(end, start)); return 0; fail: fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", rc, jd->jd_jid, (unsigned long long)(i_size_read(jd->jd_inode) - size), jd->nr_extents); fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, bh.b_state, (unsigned long long)bh.b_size); gfs2_free_journal_extents(jd); return rc; } /** * gfs2_write_alloc_required - figure out if a write will require an allocation * @ip: the file being written to * @offset: the offset to write to * @len: the number of bytes being written * * Returns: 1 if an alloc is required, 0 otherwise */ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, unsigned int len) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head bh; unsigned int shift; u64 lblock, lblock_stop, size; u64 end_of_file; if (!len) return 0; if (gfs2_is_stuffed(ip)) { if (offset + len > gfs2_max_stuffed_size(ip)) return 1; return 0; } shift = sdp->sd_sb.sb_bsize_shift; BUG_ON(gfs2_is_dir(ip)); end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) return 1; size = (lblock_stop - lblock) << shift; do { bh.b_state = 0; bh.b_size = size; gfs2_block_map(&ip->i_inode, lblock, &bh, 0); if (!buffer_mapped(&bh)) return 1; size -= bh.b_size; lblock += (bh.b_size >> ip->i_inode.i_blkbits); } while(size > 0); return 0; } static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; if (offset >= inode->i_size) return 0; if (offset + length > inode->i_size) length = inode->i_size - offset; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) return error; gfs2_trans_add_meta(ip->i_gl, dibh); memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0, length); brelse(dibh); return 0; } static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset, loff_t length) { struct gfs2_sbd *sdp = GFS2_SB(inode); loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; int error; while (length) { struct gfs2_trans *tr; loff_t chunk; unsigned int offs; chunk = length; if (chunk > max_chunk) chunk = max_chunk; offs = offset & ~PAGE_MASK; if (offs && chunk > PAGE_SIZE) chunk = offs + ((chunk - offs) & PAGE_MASK); truncate_pagecache_range(inode, offset, chunk); offset += chunk; length -= chunk; tr = current->journal_info; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) continue; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); if (error) return error; } return 0; } int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) { struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int blocksize = i_blocksize(inode); loff_t start, end; int error; if (!gfs2_is_stuffed(ip)) { unsigned int start_off, end_len; start_off = offset & (blocksize - 1); end_len = (offset + length) & (blocksize - 1); if (start_off) { unsigned int len = length; if (length > blocksize - start_off) len = blocksize - start_off; error = gfs2_block_zero_range(inode, offset, len); if (error) goto out; if (start_off + length < blocksize) end_len = 0; } if (end_len) { error = gfs2_block_zero_range(inode, offset + length - end_len, end_len); if (error) goto out; } } start = round_down(offset, blocksize); end = round_up(offset + length, blocksize) - 1; error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (gfs2_is_jdata(ip)) error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, GFS2_JTRUNC_REVOKES); else error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; if (gfs2_is_stuffed(ip)) { error = stuffed_zero_range(inode, offset, length); if (error) goto out; } if (gfs2_is_jdata(ip)) { BUG_ON(!current->journal_info); gfs2_journaled_truncate_range(inode, offset, length); } else truncate_pagecache_range(inode, offset, offset + length - 1); file_update_time(file); mark_inode_dirty(inode); if (current->journal_info) gfs2_trans_end(sdp); if (!gfs2_is_stuffed(ip)) error = punch_hole(ip, offset, length); out: if (current->journal_info) gfs2_trans_end(sdp); return error; } static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned int len) { int ret; if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode)))) return -EIO; if (offset >= wpc->iomap.offset && offset < wpc->iomap.offset + wpc->iomap.length) return 0; memset(&wpc->iomap, 0, sizeof(wpc->iomap)); ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap); return ret; } const struct iomap_writeback_ops gfs2_writeback_ops = { .map_blocks = gfs2_map_blocks, }; |
| 11 11 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | // SPDX-License-Identifier: GPL-2.0-or-later /* In-software asymmetric public-key crypto subtype * * See Documentation/crypto/asymmetric-keys.rst * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "PKEY: "fmt #include <crypto/akcipher.h> #include <crypto/public_key.h> #include <crypto/sig.h> #include <keys/asymmetric-subtype.h> #include <linux/asn1.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> MODULE_DESCRIPTION("In-software asymmetric public-key subtype"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); /* * Provide a part of a description of the key for /proc/keys. */ static void public_key_describe(const struct key *asymmetric_key, struct seq_file *m) { struct public_key *key = asymmetric_key->payload.data[asym_crypto]; if (key) seq_printf(m, "%s.%s", key->id_type, key->pkey_algo); } /* * Destroy a public key algorithm key. */ void public_key_free(struct public_key *key) { if (key) { kfree_sensitive(key->key); kfree(key->params); kfree(key); } } EXPORT_SYMBOL_GPL(public_key_free); /* * Destroy a public key algorithm key. */ static void public_key_destroy(void *payload0, void *payload3) { public_key_free(payload0); public_key_signature_free(payload3); } /* * Given a public_key, and an encoding and hash_algo to be used for signing * and/or verification with that key, determine the name of the corresponding * akcipher algorithm. Also check that encoding and hash_algo are allowed. */ static int software_key_determine_akcipher(const struct public_key *pkey, const char *encoding, const char *hash_algo, char alg_name[CRYPTO_MAX_ALG_NAME], bool *sig, enum kernel_pkey_operation op) { int n; *sig = true; if (!encoding) return -EINVAL; if (strcmp(pkey->pkey_algo, "rsa") == 0) { /* * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2]. */ if (strcmp(encoding, "pkcs1") == 0) { *sig = op == kernel_pkey_sign || op == kernel_pkey_verify; if (!*sig) { /* * For encrypt/decrypt, hash_algo is not used * but allowed to be set for historic reasons. */ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", pkey->pkey_algo); } else { if (!hash_algo) hash_algo = "none"; n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1(%s,%s)", pkey->pkey_algo, hash_algo); } return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; } if (strcmp(encoding, "raw") != 0) return -EINVAL; /* * Raw RSA cannot differentiate between different hash * algorithms. */ if (hash_algo) return -EINVAL; *sig = false; } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { if (strcmp(encoding, "x962") != 0 && strcmp(encoding, "p1363") != 0) return -EINVAL; /* * ECDSA signatures are taken over a raw hash, so they don't * differentiate between different hash algorithms. That means * that the verifier should hard-code a specific hash algorithm. * Unfortunately, in practice ECDSA is used with multiple SHAs, * so we have to allow all of them and not just one. */ if (!hash_algo) return -EINVAL; if (strcmp(hash_algo, "sha1") != 0 && strcmp(hash_algo, "sha224") != 0 && strcmp(hash_algo, "sha256") != 0 && strcmp(hash_algo, "sha384") != 0 && strcmp(hash_algo, "sha512") != 0 && strcmp(hash_algo, "sha3-256") != 0 && strcmp(hash_algo, "sha3-384") != 0 && strcmp(hash_algo, "sha3-512") != 0) return -EINVAL; n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", encoding, pkey->pkey_algo); return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) { if (strcmp(encoding, "raw") != 0) return -EINVAL; if (!hash_algo) return -EINVAL; if (strcmp(hash_algo, "streebog256") != 0 && strcmp(hash_algo, "streebog512") != 0) return -EINVAL; } else { /* Unknown public key algorithm */ return -ENOPKG; } if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0) return -EINVAL; return 0; } static u8 *pkey_pack_u32(u8 *dst, u32 val) { memcpy(dst, &val, sizeof(val)); return dst + sizeof(val); } /* * Query information about a key. */ static int software_key_query(const struct kernel_pkey_params *params, struct kernel_pkey_query *info) { struct crypto_akcipher *tfm; struct public_key *pkey = params->key->payload.data[asym_crypto]; char alg_name[CRYPTO_MAX_ALG_NAME]; struct crypto_sig *sig; u8 *key, *ptr; int ret, len; bool issig; ret = software_key_determine_akcipher(pkey, params->encoding, params->hash_algo, alg_name, &issig, kernel_pkey_sign); if (ret < 0) return ret; key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen, GFP_KERNEL); if (!key) return -ENOMEM; memcpy(key, pkey->key, pkey->keylen); ptr = key + pkey->keylen; ptr = pkey_pack_u32(ptr, pkey->algo); ptr = pkey_pack_u32(ptr, pkey->paramlen); memcpy(ptr, pkey->params, pkey->paramlen); if (issig) { sig = crypto_alloc_sig(alg_name, 0, 0); if (IS_ERR(sig)) { ret = PTR_ERR(sig); goto error_free_key; } if (pkey->key_is_private) ret = crypto_sig_set_privkey(sig, key, pkey->keylen); else ret = crypto_sig_set_pubkey(sig, key, pkey->keylen); if (ret < 0) goto error_free_tfm; len = crypto_sig_keysize(sig); info->max_sig_size = crypto_sig_maxsize(sig); info->max_data_size = crypto_sig_digestsize(sig); info->supported_ops = KEYCTL_SUPPORTS_VERIFY; if (pkey->key_is_private) info->supported_ops |= KEYCTL_SUPPORTS_SIGN; if (strcmp(params->encoding, "pkcs1") == 0) { info->supported_ops |= KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT; } } else { tfm = crypto_alloc_akcipher(alg_name, 0, 0); if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); goto error_free_key; } if (pkey->key_is_private) ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen); else ret = crypto_akcipher_set_pub_key(tfm, key, pkey->keylen); if (ret < 0) goto error_free_tfm; len = crypto_akcipher_maxsize(tfm); info->max_sig_size = len; info->max_data_size = len; info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT; } info->key_size = len * 8; info->max_enc_size = len; info->max_dec_size = len; ret = 0; error_free_tfm: if (issig) crypto_free_sig(sig); else crypto_free_akcipher(tfm); error_free_key: kfree_sensitive(key); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } /* * Do encryption, decryption and signing ops. */ static int software_key_eds_op(struct kernel_pkey_params *params, const void *in, void *out) { const struct public_key *pkey = params->key->payload.data[asym_crypto]; char alg_name[CRYPTO_MAX_ALG_NAME]; struct crypto_akcipher *tfm; struct crypto_sig *sig; char *key, *ptr; bool issig; int ksz; int ret; pr_devel("==>%s()\n", __func__); ret = software_key_determine_akcipher(pkey, params->encoding, params->hash_algo, alg_name, &issig, params->op); if (ret < 0) return ret; key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen, GFP_KERNEL); if (!key) return -ENOMEM; memcpy(key, pkey->key, pkey->keylen); ptr = key + pkey->keylen; ptr = pkey_pack_u32(ptr, pkey->algo); ptr = pkey_pack_u32(ptr, pkey->paramlen); memcpy(ptr, pkey->params, pkey->paramlen); if (issig) { sig = crypto_alloc_sig(alg_name, 0, 0); if (IS_ERR(sig)) { ret = PTR_ERR(sig); goto error_free_key; } if (pkey->key_is_private) ret = crypto_sig_set_privkey(sig, key, pkey->keylen); else ret = crypto_sig_set_pubkey(sig, key, pkey->keylen); if (ret) goto error_free_tfm; ksz = crypto_sig_keysize(sig); } else { tfm = crypto_alloc_akcipher(alg_name, 0, 0); if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); goto error_free_key; } if (pkey->key_is_private) ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen); else ret = crypto_akcipher_set_pub_key(tfm, key, pkey->keylen); if (ret) goto error_free_tfm; ksz = crypto_akcipher_maxsize(tfm); } ret = -EINVAL; /* Perform the encryption calculation. */ switch (params->op) { case kernel_pkey_encrypt: if (issig) break; ret = crypto_akcipher_sync_encrypt(tfm, in, params->in_len, out, params->out_len); break; case kernel_pkey_decrypt: if (issig) break; ret = crypto_akcipher_sync_decrypt(tfm, in, params->in_len, out, params->out_len); break; case kernel_pkey_sign: if (!issig) break; ret = crypto_sig_sign(sig, in, params->in_len, out, params->out_len); break; default: BUG(); } if (ret == 0) ret = ksz; error_free_tfm: if (issig) crypto_free_sig(sig); else crypto_free_akcipher(tfm); error_free_key: kfree_sensitive(key); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } /* * Verify a signature using a public key. */ int public_key_verify_signature(const struct public_key *pkey, const struct public_key_signature *sig) { char alg_name[CRYPTO_MAX_ALG_NAME]; struct crypto_sig *tfm; char *key, *ptr; bool issig; int ret; pr_devel("==>%s()\n", __func__); BUG_ON(!pkey); BUG_ON(!sig); BUG_ON(!sig->s); /* * If the signature specifies a public key algorithm, it *must* match * the key's actual public key algorithm. * * Small exception: ECDSA signatures don't specify the curve, but ECDSA * keys do. So the strings can mismatch slightly in that case: * "ecdsa-nist-*" for the key, but "ecdsa" for the signature. */ if (sig->pkey_algo) { if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 && (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 || strcmp(sig->pkey_algo, "ecdsa") != 0)) return -EKEYREJECTED; } ret = software_key_determine_akcipher(pkey, sig->encoding, sig->hash_algo, alg_name, &issig, kernel_pkey_verify); if (ret < 0) return ret; tfm = crypto_alloc_sig(alg_name, 0, 0); if (IS_ERR(tfm)) return PTR_ERR(tfm); key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen, GFP_KERNEL); if (!key) { ret = -ENOMEM; goto error_free_tfm; } memcpy(key, pkey->key, pkey->keylen); ptr = key + pkey->keylen; ptr = pkey_pack_u32(ptr, pkey->algo); ptr = pkey_pack_u32(ptr, pkey->paramlen); memcpy(ptr, pkey->params, pkey->paramlen); if (pkey->key_is_private) ret = crypto_sig_set_privkey(tfm, key, pkey->keylen); else ret = crypto_sig_set_pubkey(tfm, key, pkey->keylen); if (ret) goto error_free_key; ret = crypto_sig_verify(tfm, sig->s, sig->s_size, sig->digest, sig->digest_size); error_free_key: kfree_sensitive(key); error_free_tfm: crypto_free_sig(tfm); pr_devel("<==%s() = %d\n", __func__, ret); if (WARN_ON_ONCE(ret > 0)) ret = -EINVAL; return ret; } EXPORT_SYMBOL_GPL(public_key_verify_signature); static int public_key_verify_signature_2(const struct key *key, const struct public_key_signature *sig) { const struct public_key *pk = key->payload.data[asym_crypto]; return public_key_verify_signature(pk, sig); } /* * Public key algorithm asymmetric key subtype */ struct asymmetric_key_subtype public_key_subtype = { .owner = THIS_MODULE, .name = "public_key", .name_len = sizeof("public_key") - 1, .describe = public_key_describe, .destroy = public_key_destroy, .query = software_key_query, .eds_op = software_key_eds_op, .verify_signature = public_key_verify_signature_2, }; EXPORT_SYMBOL_GPL(public_key_subtype); |
| 6 6 3 2 1 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for ELECOM devices: * - BM084 Bluetooth Mouse * - EX-G Trackballs (M-XT3DRBK, M-XT3URBK, M-XT4DRBK) * - DEFT Trackballs (M-DT1DRBK, M-DT1URBK, M-DT2DRBK, M-DT2URBK) * - HUGE Trackballs (M-HT1DRBK, M-HT1URBK) * * Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com> * Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com> * Copyright (c) 2017 Diego Elio Pettenò <flameeyes@flameeyes.eu> * Copyright (c) 2017 Alex Manoussakis <amanou@gnu.org> * Copyright (c) 2017 Tomasz Kramkowski <tk@the-tk.com> * Copyright (c) 2020 YOSHIOKA Takuma <lo48576@hard-wi.red> * Copyright (c) 2022 Takahiro Fujii <fujii@xaxxi.net> */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* * Certain ELECOM mice misreport their button count meaning that they only work * correctly with the ELECOM mouse assistant software which is unavailable for * Linux. A four extra INPUT reports and a FEATURE report are described by the * report descriptor but it does not appear that these enable software to * control what the extra buttons map to. The only simple and straightforward * solution seems to involve fixing up the report descriptor. */ #define MOUSE_BUTTONS_MAX 8 static void mouse_button_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int rsize, unsigned int button_bit_count, unsigned int padding_bit, unsigned int button_report_size, unsigned int button_usage_maximum, int nbuttons) { if (rsize < 32 || rdesc[button_bit_count] != 0x95 || rdesc[button_report_size] != 0x75 || rdesc[button_report_size + 1] != 0x01 || rdesc[button_usage_maximum] != 0x29 || rdesc[padding_bit] != 0x75) return; hid_info(hdev, "Fixing up Elecom mouse button count\n"); nbuttons = clamp(nbuttons, 0, MOUSE_BUTTONS_MAX); rdesc[button_bit_count + 1] = nbuttons; rdesc[button_usage_maximum + 1] = nbuttons; rdesc[padding_bit + 1] = MOUSE_BUTTONS_MAX - nbuttons; } static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { switch (hdev->product) { case USB_DEVICE_ID_ELECOM_BM084: /* The BM084 Bluetooth mouse includes a non-existing horizontal * wheel in the HID descriptor. */ if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n"); rdesc[47] = 0x00; } break; case USB_DEVICE_ID_ELECOM_M_XGL20DLBK: /* * Report descriptor format: * 20: button bit count * 28: padding bit count * 22: button report size * 14: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 20, 28, 22, 14, 8); break; case USB_DEVICE_ID_ELECOM_M_XT3URBK: case USB_DEVICE_ID_ELECOM_M_XT3DRBK: case USB_DEVICE_ID_ELECOM_M_XT4DRBK: /* * Report descriptor format: * 12: button bit count * 30: padding bit count * 14: button report size * 20: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 6); break; case USB_DEVICE_ID_ELECOM_M_DT1URBK: case USB_DEVICE_ID_ELECOM_M_DT1DRBK: case USB_DEVICE_ID_ELECOM_M_HT1URBK: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D: /* * Report descriptor format: * 12: button bit count * 30: padding bit count * 14: button report size * 20: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 8); break; case USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C: /* * Report descriptor format: * 22: button bit count * 30: padding bit count * 24: button report size * 16: button usage maximum */ mouse_button_fixup(hdev, rdesc, *rsize, 22, 30, 24, 16, 8); break; } return rdesc; } static const struct hid_device_id elecom_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XGL20DLBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT3DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, { } }; MODULE_DEVICE_TABLE(hid, elecom_devices); static struct hid_driver elecom_driver = { .name = "elecom", .id_table = elecom_devices, .report_fixup = elecom_report_fixup }; module_hid_driver(elecom_driver); MODULE_DESCRIPTION("HID driver for ELECOM devices"); MODULE_LICENSE("GPL"); |
| 1 1 1 1 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,port,ip type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ /* 3 Comments support added */ /* 4 Forceadd support added */ /* 5 skbinfo support added */ #define IPSET_TYPE_REV_MAX 6 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip /* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { __be32 ip; __be32 ip2; __be16 port; u8 proto; u8 padding; }; static bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipportip4_data_list(struct sk_buff *skb, const struct hash_ipportip4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportip4_data_next(struct hash_ipportip4_elem *next, const struct hash_ipportip4_elem *d) { next->ip = d->ip; next->port = d->port; } /* Common functions */ #define MTYPE hash_ipportip4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip = ntohl(e.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } port_to = port = ntohs(e.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); if (i > IPSET_MAX_RANGE) { hash_ipportip4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } } return ret; } /* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; union nf_inet_addr ip2; __be16 port; u8 proto; u8 padding; }; /* Common functions */ static bool hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, const struct hash_ipportip6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipportip6_data_list(struct sk_buff *skb, const struct hash_ipportip6_elem *data) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportip6_data_next(struct hash_ipportip6_elem *next, const struct hash_ipportip6_elem *d) { next->port = d->port; } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipportip6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipportip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(e.port); port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); if (retried) port = ntohs(h->next.port); for (; port <= port_to; port++) { e.port = htons(port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static struct ip_set_type hash_ipportip_type __read_mostly = { .name = "hash:ip,port,ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipportip_init(void) { return ip_set_type_register(&hash_ipportip_type); } static void __exit hash_ipportip_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipportip_type); } module_init(hash_ipportip_init); module_exit(hash_ipportip_fini); |
| 1 3 3 3 3 3 28 28 1 1 1 1 1 26 26 26 26 19 1 1 1 1 1 1 1 11 11 11 8 3 8 11 15 15 4 14 3 2 1 21 9 13 1 8 8 4 2 3 1 1 1 1 1 4 4 29 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 | // SPDX-License-Identifier: GPL-2.0-only /* * Fence mechanism for dma-buf and to allow for asynchronous dma access * * Copyright (C) 2012 Canonical Ltd * Copyright (C) 2012 Texas Instruments * * Authors: * Rob Clark <robdclark@gmail.com> * Maarten Lankhorst <maarten.lankhorst@canonical.com> */ #include <linux/slab.h> #include <linux/export.h> #include <linux/atomic.h> #include <linux/dma-fence.h> #include <linux/sched/signal.h> #include <linux/seq_file.h> #define CREATE_TRACE_POINTS #include <trace/events/dma_fence.h> EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub; /* * fence context counter: each execution context should have its own * fence context, this allows checking if fences belong to the same * context or not. One device can have multiple separate contexts, * and they're used if some engine can run independently of another. */ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); /** * DOC: DMA fences overview * * DMA fences, represented by &struct dma_fence, are the kernel internal * synchronization primitive for DMA operations like GPU rendering, video * encoding/decoding, or displaying buffers on a screen. * * A fence is initialized using dma_fence_init() and completed using * dma_fence_signal(). Fences are associated with a context, allocated through * dma_fence_context_alloc(), and all fences on the same context are * fully ordered. * * Since the purposes of fences is to facilitate cross-device and * cross-application synchronization, there's multiple ways to use one: * * - Individual fences can be exposed as a &sync_file, accessed as a file * descriptor from userspace, created by calling sync_file_create(). This is * called explicit fencing, since userspace passes around explicit * synchronization points. * * - Some subsystems also have their own explicit fencing primitives, like * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying * fence to be updated. * * - Then there's also implicit fencing, where the synchronization points are * implicitly passed around as part of shared &dma_buf instances. Such * implicit fences are stored in &struct dma_resv through the * &dma_buf.resv pointer. */ /** * DOC: fence cross-driver contract * * Since &dma_fence provide a cross driver contract, all drivers must follow the * same rules: * * * Fences must complete in a reasonable time. Fences which represent kernels * and shaders submitted by userspace, which could run forever, must be backed * up by timeout and gpu hang recovery code. Minimally that code must prevent * further command submission and force complete all in-flight fences, e.g. * when the driver or hardware do not support gpu reset, or if the gpu reset * failed for some reason. Ideally the driver supports gpu recovery which only * affects the offending userspace context, and no other userspace * submissions. * * * Drivers may have different ideas of what completion within a reasonable * time means. Some hang recovery code uses a fixed timeout, others a mix * between observing forward progress and increasingly strict timeouts. * Drivers should not try to second guess timeout handling of fences from * other drivers. * * * To ensure there's no deadlocks of dma_fence_wait() against other locks * drivers should annotate all code required to reach dma_fence_signal(), * which completes the fences, with dma_fence_begin_signalling() and * dma_fence_end_signalling(). * * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). * This means any code required for fence completion cannot acquire a * &dma_resv lock. Note that this also pulls in the entire established * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). * * * Drivers are allowed to call dma_fence_wait() from their &shrinker * callbacks. This means any code required for fence completion cannot * allocate memory with GFP_KERNEL. * * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier * respectively &mmu_interval_notifier callbacks. This means any code required * for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO. * Only GFP_ATOMIC is permissible, which might fail. * * Note that only GPU drivers have a reasonable excuse for both requiring * &mmu_interval_notifier and &shrinker callbacks at the same time as having to * track asynchronous compute work using &dma_fence. No driver outside of * drivers/gpu should ever call dma_fence_wait() in such contexts. */ static const char *dma_fence_stub_get_name(struct dma_fence *fence) { return "stub"; } static const struct dma_fence_ops dma_fence_stub_ops = { .get_driver_name = dma_fence_stub_get_name, .get_timeline_name = dma_fence_stub_get_name, }; /** * dma_fence_get_stub - return a signaled fence * * Return a stub fence which is already signaled. The fence's * timestamp corresponds to the first time after boot this * function is called. */ struct dma_fence *dma_fence_get_stub(void) { spin_lock(&dma_fence_stub_lock); if (!dma_fence_stub.ops) { dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &dma_fence_stub.flags); dma_fence_signal_locked(&dma_fence_stub); } spin_unlock(&dma_fence_stub_lock); return dma_fence_get(&dma_fence_stub); } EXPORT_SYMBOL(dma_fence_get_stub); /** * dma_fence_allocate_private_stub - return a private, signaled fence * @timestamp: timestamp when the fence was signaled * * Return a newly allocated and signaled stub fence. */ struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp) { struct dma_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (fence == NULL) return NULL; dma_fence_init(fence, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); dma_fence_signal_timestamp(fence, timestamp); return fence; } EXPORT_SYMBOL(dma_fence_allocate_private_stub); /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate * * This function will return the first index of the number of fence contexts * allocated. The fence context is used for setting &dma_fence.context to a * unique number by passing the context to dma_fence_init(). */ u64 dma_fence_context_alloc(unsigned num) { WARN_ON(!num); return atomic64_fetch_add(num, &dma_fence_context_counter); } EXPORT_SYMBOL(dma_fence_context_alloc); /** * DOC: fence signalling annotation * * Proving correctness of all the kernel code around &dma_fence through code * review and testing is tricky for a few reasons: * * * It is a cross-driver contract, and therefore all drivers must follow the * same rules for lock nesting order, calling contexts for various functions * and anything else significant for in-kernel interfaces. But it is also * impossible to test all drivers in a single machine, hence brute-force N vs. * N testing of all combinations is impossible. Even just limiting to the * possible combinations is infeasible. * * * There is an enormous amount of driver code involved. For render drivers * there's the tail of command submission, after fences are published, * scheduler code, interrupt and workers to process job completion, * and timeout, gpu reset and gpu hang recovery code. Plus for integration * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, * and &shrinker. For modesetting drivers there's the commit tail functions * between when fences for an atomic modeset are published, and when the * corresponding vblank completes, including any interrupt processing and * related workers. Auditing all that code, across all drivers, is not * feasible. * * * Due to how many other subsystems are involved and the locking hierarchies * this pulls in there is extremely thin wiggle-room for driver-specific * differences. &dma_fence interacts with almost all of the core memory * handling through page fault handlers via &dma_resv, dma_resv_lock() and * dma_resv_unlock(). On the other side it also interacts through all * allocation sites through &mmu_notifier and &shrinker. * * Furthermore lockdep does not handle cross-release dependencies, which means * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught * at runtime with some quick testing. The simplest example is one thread * waiting on a &dma_fence while holding a lock:: * * lock(A); * dma_fence_wait(B); * unlock(A); * * while the other thread is stuck trying to acquire the same lock, which * prevents it from signalling the fence the previous thread is stuck waiting * on:: * * lock(A); * unlock(A); * dma_fence_signal(B); * * By manually annotating all code relevant to signalling a &dma_fence we can * teach lockdep about these dependencies, which also helps with the validation * headache since now lockdep can check all the rules for us:: * * cookie = dma_fence_begin_signalling(); * lock(A); * unlock(A); * dma_fence_signal(B); * dma_fence_end_signalling(cookie); * * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to * annotate critical sections the following rules need to be observed: * * * All code necessary to complete a &dma_fence must be annotated, from the * point where a fence is accessible to other threads, to the point where * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, * and due to the very strict rules and many corner cases it is infeasible to * catch these just with review or normal stress testing. * * * &struct dma_resv deserves a special note, since the readers are only * protected by rcu. This means the signalling critical section starts as soon * as the new fences are installed, even before dma_resv_unlock() is called. * * * The only exception are fast paths and opportunistic signalling code, which * calls dma_fence_signal() purely as an optimization, but is not required to * guarantee completion of a &dma_fence. The usual example is a wait IOCTL * which calls dma_fence_signal(), while the mandatory completion path goes * through a hardware interrupt and possible job completion worker. * * * To aid composability of code, the annotations can be freely nested, as long * as the overall locking hierarchy is consistent. The annotations also work * both in interrupt and process context. Due to implementation details this * requires that callers pass an opaque cookie from * dma_fence_begin_signalling() to dma_fence_end_signalling(). * * * Validation against the cross driver contract is implemented by priming * lockdep with the relevant hierarchy at boot-up. This means even just * testing with a single device is enough to validate a driver, at least as * far as deadlocks with dma_fence_wait() against dma_fence_signal() are * concerned. */ #ifdef CONFIG_LOCKDEP static struct lockdep_map dma_fence_lockdep_map = { .name = "dma_fence_map" }; /** * dma_fence_begin_signalling - begin a critical DMA fence signalling section * * Drivers should use this to annotate the beginning of any code section * required to eventually complete &dma_fence by calling dma_fence_signal(). * * The end of these critical sections are annotated with * dma_fence_end_signalling(). * * Returns: * * Opaque cookie needed by the implementation, which needs to be passed to * dma_fence_end_signalling(). */ bool dma_fence_begin_signalling(void) { /* explicitly nesting ... */ if (lock_is_held_type(&dma_fence_lockdep_map, 1)) return true; /* rely on might_sleep check for soft/hardirq locks */ if (in_atomic()) return true; /* ... and non-recursive successful read_trylock */ lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_); return false; } EXPORT_SYMBOL(dma_fence_begin_signalling); /** * dma_fence_end_signalling - end a critical DMA fence signalling section * @cookie: opaque cookie from dma_fence_begin_signalling() * * Closes a critical section annotation opened by dma_fence_begin_signalling(). */ void dma_fence_end_signalling(bool cookie) { if (cookie) return; lock_release(&dma_fence_lockdep_map, _RET_IP_); } EXPORT_SYMBOL(dma_fence_end_signalling); void __dma_fence_might_wait(void) { bool tmp; tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); if (tmp) lock_release(&dma_fence_lockdep_map, _THIS_IP_); lock_map_acquire(&dma_fence_lockdep_map); lock_map_release(&dma_fence_lockdep_map); if (tmp) lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_); } #endif /** * dma_fence_signal_timestamp_locked - signal completion of a fence * @fence: the fence to signal * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. * * Unlike dma_fence_signal_timestamp(), this function must be called with * &dma_fence.lock held. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp) { struct dma_fence_cb *cur, *tmp; struct list_head cb_list; lockdep_assert_held(fence->lock); if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) return -EINVAL; /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list); fence->timestamp = timestamp; set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); list_for_each_entry_safe(cur, tmp, &cb_list, node) { INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } return 0; } EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); /** * dma_fence_signal_timestamp - signal completion of a fence * @fence: the fence to signal * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) { unsigned long flags; int ret; if (WARN_ON(!fence)) return -EINVAL; spin_lock_irqsave(fence->lock, flags); ret = dma_fence_signal_timestamp_locked(fence, timestamp); spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_signal_timestamp); /** * dma_fence_signal_locked - signal completion of a fence * @fence: the fence to signal * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. * * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock * held. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal_locked(struct dma_fence *fence) { return dma_fence_signal_timestamp_locked(fence, ktime_get()); } EXPORT_SYMBOL(dma_fence_signal_locked); /** * dma_fence_signal - signal completion of a fence * @fence: the fence to signal * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal(struct dma_fence *fence) { unsigned long flags; int ret; bool tmp; if (WARN_ON(!fence)) return -EINVAL; tmp = dma_fence_begin_signalling(); spin_lock_irqsave(fence->lock, flags); ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); spin_unlock_irqrestore(fence->lock, flags); dma_fence_end_signalling(tmp); return ret; } EXPORT_SYMBOL(dma_fence_signal); /** * dma_fence_wait_timeout - sleep until the fence gets signaled * or until timeout elapses * @fence: the fence to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the * remaining timeout in jiffies on success. Other error values may be * returned on custom implementations. * * Performs a synchronous wait on this fence. It is assumed the caller * directly or indirectly (buf-mgr between reservation and committing) * holds a reference to the fence, otherwise the fence might be * freed before return, resulting in undefined behavior. * * See also dma_fence_wait() and dma_fence_wait_any_timeout(). */ signed long dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) { signed long ret; if (WARN_ON(timeout < 0)) return -EINVAL; might_sleep(); __dma_fence_might_wait(); dma_fence_enable_sw_signaling(fence); trace_dma_fence_wait_start(fence); if (fence->ops->wait) ret = fence->ops->wait(fence, intr, timeout); else ret = dma_fence_default_wait(fence, intr, timeout); trace_dma_fence_wait_end(fence); return ret; } EXPORT_SYMBOL(dma_fence_wait_timeout); /** * dma_fence_release - default release function for fences * @kref: &dma_fence.recfount * * This is the default release functions for &dma_fence. Drivers shouldn't call * this directly, but instead call dma_fence_put(). */ void dma_fence_release(struct kref *kref) { struct dma_fence *fence = container_of(kref, struct dma_fence, refcount); trace_dma_fence_destroy(fence); if (WARN(!list_empty(&fence->cb_list) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags), "Fence %s:%s:%llx:%llx released with pending signals!\n", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->context, fence->seqno)) { unsigned long flags; /* * Failed to signal before release, likely a refcounting issue. * * This should never happen, but if it does make sure that we * don't leave chains dangling. We set the error flag first * so that the callbacks know this signal is due to an error. */ spin_lock_irqsave(fence->lock, flags); fence->error = -EDEADLK; dma_fence_signal_locked(fence); spin_unlock_irqrestore(fence->lock, flags); } if (fence->ops->release) fence->ops->release(fence); else dma_fence_free(fence); } EXPORT_SYMBOL(dma_fence_release); /** * dma_fence_free - default release function for &dma_fence. * @fence: fence to release * * This is the default implementation for &dma_fence_ops.release. It calls * kfree_rcu() on @fence. */ void dma_fence_free(struct dma_fence *fence) { kfree_rcu(fence, rcu); } EXPORT_SYMBOL(dma_fence_free); static bool __dma_fence_enable_signaling(struct dma_fence *fence) { bool was_set; lockdep_assert_held(fence->lock); was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return false; if (!was_set && fence->ops->enable_signaling) { trace_dma_fence_enable_signal(fence); if (!fence->ops->enable_signaling(fence)) { dma_fence_signal_locked(fence); return false; } } return true; } /** * dma_fence_enable_sw_signaling - enable signaling on fence * @fence: the fence to enable * * This will request for sw signaling to be enabled, to make the fence * complete as soon as possible. This calls &dma_fence_ops.enable_signaling * internally. */ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags; spin_lock_irqsave(fence->lock, flags); __dma_fence_enable_signaling(fence); spin_unlock_irqrestore(fence->lock, flags); } EXPORT_SYMBOL(dma_fence_enable_sw_signaling); /** * dma_fence_add_callback - add a callback to be called when the fence * is signaled * @fence: the fence to wait on * @cb: the callback to register * @func: the function to call * * Add a software callback to the fence. The caller should keep a reference to * the fence. * * @cb will be initialized by dma_fence_add_callback(), no initialization * by the caller is required. Any number of callbacks can be registered * to a fence, but a callback can only be registered to one fence at a time. * * If fence is already signaled, this function will return -ENOENT (and * *not* call the callback). * * Note that the callback can be called from an atomic context or irq context. * * Returns 0 in case of success, -ENOENT if the fence is already signaled * and -EINVAL in case of error. */ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func) { unsigned long flags; int ret = 0; if (WARN_ON(!fence || !func)) return -EINVAL; if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { INIT_LIST_HEAD(&cb->node); return -ENOENT; } spin_lock_irqsave(fence->lock, flags); if (__dma_fence_enable_signaling(fence)) { cb->func = func; list_add_tail(&cb->node, &fence->cb_list); } else { INIT_LIST_HEAD(&cb->node); ret = -ENOENT; } spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_add_callback); /** * dma_fence_get_status - returns the status upon completion * @fence: the dma_fence to query * * This wraps dma_fence_get_status_locked() to return the error status * condition on a signaled fence. See dma_fence_get_status_locked() for more * details. * * Returns 0 if the fence has not yet been signaled, 1 if the fence has * been signaled without an error condition, or a negative error code * if the fence has been completed in err. */ int dma_fence_get_status(struct dma_fence *fence) { unsigned long flags; int status; spin_lock_irqsave(fence->lock, flags); status = dma_fence_get_status_locked(fence); spin_unlock_irqrestore(fence->lock, flags); return status; } EXPORT_SYMBOL(dma_fence_get_status); /** * dma_fence_remove_callback - remove a callback from the signaling list * @fence: the fence to wait on * @cb: the callback to remove * * Remove a previously queued callback from the fence. This function returns * true if the callback is successfully removed, or false if the fence has * already been signaled. * * *WARNING*: * Cancelling a callback should only be done if you really know what you're * doing, since deadlocks and race conditions could occur all too easily. For * this reason, it should only ever be done on hardware lockup recovery, * with a reference held to the fence. * * Behaviour is undefined if @cb has not been added to @fence using * dma_fence_add_callback() beforehand. */ bool dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) { unsigned long flags; bool ret; spin_lock_irqsave(fence->lock, flags); ret = !list_empty(&cb->node); if (ret) list_del_init(&cb->node); spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_remove_callback); struct default_wait_cb { struct dma_fence_cb base; struct task_struct *task; }; static void dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct default_wait_cb *wait = container_of(cb, struct default_wait_cb, base); wake_up_state(wait->task, TASK_NORMAL); } /** * dma_fence_default_wait - default sleep until the fence gets signaled * or until timeout elapses * @fence: the fence to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the * remaining timeout in jiffies on success. If timeout is zero the value one is * returned if the fence is already signaled for consistency with other * functions taking a jiffies timeout. */ signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; unsigned long flags; signed long ret = timeout ? timeout : 1; spin_lock_irqsave(fence->lock, flags); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) goto out; if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; } if (!timeout) { ret = 0; goto out; } cb.base.func = dma_fence_default_wait_cb; cb.task = current; list_add(&cb.base.node, &fence->cb_list); while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { if (intr) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(fence->lock, flags); ret = schedule_timeout(ret); spin_lock_irqsave(fence->lock, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } if (!list_empty(&cb.base.node)) list_del(&cb.base.node); __set_current_state(TASK_RUNNING); out: spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_default_wait); static bool dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, uint32_t *idx) { int i; for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (idx) *idx = i; return true; } } return false; } /** * dma_fence_wait_any_timeout - sleep until any fence gets signaled * or until timeout elapses * @fences: array of fences to wait on * @count: number of fences to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * @idx: used to store the first signaled fence index, meaningful only on * positive return * * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies * on success. * * Synchronous waits for the first fence in the array to be signaled. The * caller needs to hold a reference to all fences in the array, otherwise a * fence might be freed before return, resulting in undefined behavior. * * See also dma_fence_wait() and dma_fence_wait_timeout(). */ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, bool intr, signed long timeout, uint32_t *idx) { struct default_wait_cb *cb; signed long ret = timeout; unsigned i; if (WARN_ON(!fences || !count || timeout < 0)) return -EINVAL; if (timeout == 0) { for (i = 0; i < count; ++i) if (dma_fence_is_signaled(fences[i])) { if (idx) *idx = i; return 1; } return 0; } cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); if (cb == NULL) { ret = -ENOMEM; goto err_free_cb; } for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; cb[i].task = current; if (dma_fence_add_callback(fence, &cb[i].base, dma_fence_default_wait_cb)) { /* This fence is already signaled */ if (idx) *idx = i; goto fence_rm_cb; } } while (ret > 0) { if (intr) set_current_state(TASK_INTERRUPTIBLE); else set_current_state(TASK_UNINTERRUPTIBLE); if (dma_fence_test_signaled_any(fences, count, idx)) break; ret = schedule_timeout(ret); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } __set_current_state(TASK_RUNNING); fence_rm_cb: while (i-- > 0) dma_fence_remove_callback(fences[i], &cb[i].base); err_free_cb: kfree(cb); return ret; } EXPORT_SYMBOL(dma_fence_wait_any_timeout); /** * DOC: deadline hints * * In an ideal world, it would be possible to pipeline a workload sufficiently * that a utilization based device frequency governor could arrive at a minimum * frequency that meets the requirements of the use-case, in order to minimize * power consumption. But in the real world there are many workloads which * defy this ideal. For example, but not limited to: * * * Workloads that ping-pong between device and CPU, with alternating periods * of CPU waiting for device, and device waiting on CPU. This can result in * devfreq and cpufreq seeing idle time in their respective domains and in * result reduce frequency. * * * Workloads that interact with a periodic time based deadline, such as double * buffered GPU rendering vs vblank sync'd page flipping. In this scenario, * missing a vblank deadline results in an *increase* in idle time on the GPU * (since it has to wait an additional vblank period), sending a signal to * the GPU's devfreq to reduce frequency, when in fact the opposite is what is * needed. * * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline * (or indirectly via userspace facing ioctls like &sync_set_deadline). * The deadline hint provides a way for the waiting driver, or userspace, to * convey an appropriate sense of urgency to the signaling driver. * * A deadline hint is given in absolute ktime (CLOCK_MONOTONIC for userspace * facing APIs). The time could either be some point in the future (such as * the vblank based deadline for page-flipping, or the start of a compositor's * composition cycle), or the current time to indicate an immediate deadline * hint (Ie. forward progress cannot be made until this fence is signaled). * * Multiple deadlines may be set on a given fence, even in parallel. See the * documentation for &dma_fence_ops.set_deadline. * * The deadline hint is just that, a hint. The driver that created the fence * may react by increasing frequency, making different scheduling choices, etc. * Or doing nothing at all. */ /** * dma_fence_set_deadline - set desired fence-wait deadline hint * @fence: the fence that is to be waited on * @deadline: the time by which the waiter hopes for the fence to be * signaled * * Give the fence signaler a hint about an upcoming deadline, such as * vblank, by which point the waiter would prefer the fence to be * signaled by. This is intended to give feedback to the fence signaler * to aid in power management decisions, such as boosting GPU frequency * if a periodic vblank deadline is approaching but the fence is not * yet signaled.. */ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { if (fence->ops->set_deadline && !dma_fence_is_signaled(fence)) fence->ops->set_deadline(fence, deadline); } EXPORT_SYMBOL(dma_fence_set_deadline); /** * dma_fence_describe - Dump fence description into seq_file * @fence: the fence to describe * @seq: the seq_file to put the textual description into * * Dump a textual description of the fence and it's state into the seq_file. */ void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) { seq_printf(seq, "%s %s seq %llu %ssignalled\n", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->seqno, dma_fence_is_signaled(fence) ? "" : "un"); } EXPORT_SYMBOL(dma_fence_describe); /** * dma_fence_init - Initialize a custom fence. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence * @lock: the irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * * Initializes an allocated fence, the caller doesn't have to keep its * refcount after committing with this fence, but it will need to hold a * refcount again if &dma_fence_ops.enable_signaling gets called. * * context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno) { BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); kref_init(&fence->refcount); fence->ops = ops; INIT_LIST_HEAD(&fence->cb_list); fence->lock = lock; fence->context = context; fence->seqno = seqno; fence->flags = 0UL; fence->error = 0; trace_dma_fence_init(fence); } EXPORT_SYMBOL(dma_fence_init); |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | // SPDX-License-Identifier: GPL-2.0 /* * Xsens MT USB driver * * Copyright (C) 2013 Xsens <info@xsens.com> */ #include <linux/kernel.h> #include <linux/tty.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/uaccess.h> #define XSENS_VID 0x2639 #define MTi_10_IMU_PID 0x0001 #define MTi_20_VRU_PID 0x0002 #define MTi_30_AHRS_PID 0x0003 #define MTi_100_IMU_PID 0x0011 #define MTi_200_VRU_PID 0x0012 #define MTi_300_AHRS_PID 0x0013 #define MTi_G_700_GPS_INS_PID 0x0017 static const struct usb_device_id id_table[] = { { USB_DEVICE(XSENS_VID, MTi_10_IMU_PID) }, { USB_DEVICE(XSENS_VID, MTi_20_VRU_PID) }, { USB_DEVICE(XSENS_VID, MTi_30_AHRS_PID) }, { USB_DEVICE(XSENS_VID, MTi_100_IMU_PID) }, { USB_DEVICE(XSENS_VID, MTi_200_VRU_PID) }, { USB_DEVICE(XSENS_VID, MTi_300_AHRS_PID) }, { USB_DEVICE(XSENS_VID, MTi_G_700_GPS_INS_PID) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); static int xsens_mt_probe(struct usb_serial *serial, const struct usb_device_id *id) { if (serial->interface->cur_altsetting->desc.bInterfaceNumber == 1) return 0; return -ENODEV; } static struct usb_serial_driver xsens_mt_device = { .driver = { .name = "xsens_mt", }, .id_table = id_table, .num_ports = 1, .probe = xsens_mt_probe, }; static struct usb_serial_driver * const serial_drivers[] = { &xsens_mt_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR("Frans Klaver <frans.klaver@xsens.com>"); MODULE_DESCRIPTION("USB-serial driver for Xsens motion trackers"); MODULE_LICENSE("GPL v2"); |
| 5 1464 25 4 46 46 46 46 46 63 28 372 295 116 193 594 596 833 997 995 9 353 686 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the IP router. * * Version: @(#)route.h 1.0.4 05/27/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Fixes: * Alan Cox : Reformatted. Added ip_rt_local() * Alan Cox : Support for TCP parameters. * Alexey Kuznetsov: Major changes for new routing code. * Mike McLagan : Routing by source * Robert Olsson : Added rt_cache statistics */ #ifndef _ROUTE_H #define _ROUTE_H #include <net/dst.h> #include <net/inetpeer.h> #include <net/flow.h> #include <net/inet_sock.h> #include <net/ip_fib.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/inet_dscp.h> #include <net/sock.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> #include <linux/route.h> #include <linux/ip.h> #include <linux/cache.h> #include <linux/security.h> static inline __u8 ip_sock_rt_scope(const struct sock *sk) { if (sock_flag(sk, SOCK_LOCALROUTE)) return RT_SCOPE_LINK; return RT_SCOPE_UNIVERSE; } static inline __u8 ip_sock_rt_tos(const struct sock *sk) { return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK; } struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; struct rtable { struct dst_entry dst; int rt_genid; unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; __u8 rt_uses_gateway; int rt_iif; u8 rt_gw_family; /* Info on neighbour */ union { __be32 rt_gw4; struct in6_addr rt_gw6; }; /* Miscellaneous cached information */ u32 rt_mtu_locked:1, rt_pmtu:31; }; #define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst) /** * skb_rtable - Returns the skb &rtable * @skb: buffer */ static inline struct rtable *skb_rtable(const struct sk_buff *skb) { return dst_rtable(skb_dst(skb)); } static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_is_input != 0; } static inline bool rt_is_output_route(const struct rtable *rt) { return rt->rt_is_input == 0; } static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) { if (rt->rt_gw_family == AF_INET) return rt->rt_gw4; return daddr; } struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; __u32 i_bytes; __u32 i_packets; }; struct rt_cache_stat { unsigned int in_slow_tot; unsigned int in_slow_mc; unsigned int in_no_route; unsigned int in_brd; unsigned int in_martian_dst; unsigned int in_martian_src; unsigned int out_slow_tot; unsigned int out_slow_mc; }; extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); static inline void inet_sk_init_flowi4(const struct inet_sock *inet, struct flowi4 *fl4) { const struct ip_options_rcu *ip4_opt; const struct sock *sk; __be32 daddr; rcu_read_lock(); ip4_opt = rcu_dereference(inet->inet_opt); /* Source routing option overrides the socket destination address */ if (ip4_opt && ip4_opt->opt.srr) daddr = ip4_opt->opt.faddr; else daddr = inet->inet_daddr; rcu_read_unlock(); sk = &inet->sk; flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); } struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, const struct sk_buff *skb); struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, struct fib_result *res, const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { return ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp) { return ip_route_output_flow(net, flp, NULL); } /* Simplistic IPv4 route lookup function. * This is only suitable for some particular use cases: since the flowi4 * structure is only partially set, it may bypass some fib-rules. */ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, __be32 saddr, dscp_t dscp, int oif, __u8 scope) { struct flowi4 fl4 = { .flowi4_oif = oif, .flowi4_tos = inet_dscp_to_dsfield(dscp), .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, }; return ip_route_output_key(net, &fl4); } static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4, const struct sock *sk, __be32 daddr, __be32 saddr, __be16 dport, __be16 sport, __u8 proto, __u8 tos, int oif) { flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } enum skb_drop_reason ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); enum skb_drop_reason ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); static inline enum skb_drop_reason ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp, struct net_device *devin) { enum skb_drop_reason reason; rcu_read_lock(); reason = ip_route_input_noref(skb, dst, src, dscp, devin); if (!reason) { skb_dst_force(skb); if (!skb_dst(skb)) reason = SKB_DROP_REASON_NOT_SPECIFIED; } rcu_read_unlock(); return reason; } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u8 protocol); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); unsigned int inet_addr_type(struct net *net, __be32 addr); unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, bool noxfrm); struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric); void rt_add_uncached_list(struct rtable *rt); void rt_del_uncached_list(struct rtable *rt); int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, int *fa_index, int fa_start, unsigned int flags); static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. * We rely on dst being first structure in struct rtable */ BUILD_BUG_ON(offsetof(struct rtable, dst) != 0); dst_release(&rt->dst); } extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) { return ip_tos2prio[IPTOS_TOS(tos)>>1]; } /* ip_route_connect() and ip_route_newports() work in tandem whilst * binding a socket for a new outgoing connection. * * In order to use IPSEC properly, we must, in the end, have a * route that was looked up using all available keys including source * and destination ports. * * However, if a source port needs to be allocated (the user specified * a wildcard source port) we need to obtain addressing information * in order to perform that allocation. * * So ip_route_connect() looks up a route using wildcarded source and * destination ports in the key, simply so that we can get a pair of * addresses to use for port allocation. * * Later, once the ports are allocated, ip_route_newports() will make * another route lookup if needed to make sure we catch any IPSEC * rules keyed on the port information. * * The callers allocate the flow key on their stack, and must pass in * the same flowi4 object to both the ip_route_connect() and the * ip_route_newports() calls. */ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, const struct sock *sk) { __u8 flow_flags = 0; if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, const struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt, __be16 orig_sport, __be16 orig_dport, __be16 sport, __be16 dport, const struct sock *sk) { if (sport != orig_sport || dport != orig_dport) { fl4->fl4_dport = dport; fl4->fl4_sport = sport; ip_rt_put(rt); flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr, fl4->saddr); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; } static inline int inet_iif(const struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); if (rt && rt->rt_iif) return rt->rt_iif; return skb->skb_iif; } static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); struct net *net = dev_net(dst->dev); if (hoplimit == 0) hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, __be32 daddr) { struct neighbour *neigh; neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); return neigh; } static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, struct sk_buff *skb, bool *is_v6gw) { struct net_device *dev = rt->dst.dev; struct neighbour *neigh; if (likely(rt->rt_gw_family == AF_INET)) { neigh = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { neigh = ip_neigh_gw6(dev, &rt->rt_gw6); *is_v6gw = true; } else { neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); } return neigh; } #endif /* _ROUTE_H */ |
| 1364 10500 10495 10495 10501 10515 9739 9761 956 1216 156 121 1172 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PAGE_REF_H #define _LINUX_PAGE_REF_H #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/tracepoint-defs.h> DECLARE_TRACEPOINT(page_ref_set); DECLARE_TRACEPOINT(page_ref_mod); DECLARE_TRACEPOINT(page_ref_mod_and_test); DECLARE_TRACEPOINT(page_ref_mod_and_return); DECLARE_TRACEPOINT(page_ref_mod_unless); DECLARE_TRACEPOINT(page_ref_freeze); DECLARE_TRACEPOINT(page_ref_unfreeze); #ifdef CONFIG_DEBUG_PAGE_REF /* * Ideally we would want to use the trace_<tracepoint>_enabled() helper * functions. But due to include header file issues, that is not * feasible. Instead we have to open code the static key functions. * * See trace_##name##_enabled(void) in include/linux/tracepoint.h */ #define page_ref_tracepoint_active(t) tracepoint_enabled(t) extern void __page_ref_set(struct page *page, int v); extern void __page_ref_mod(struct page *page, int v); extern void __page_ref_mod_and_test(struct page *page, int v, int ret); extern void __page_ref_mod_and_return(struct page *page, int v, int ret); extern void __page_ref_mod_unless(struct page *page, int v, int u); extern void __page_ref_freeze(struct page *page, int v, int ret); extern void __page_ref_unfreeze(struct page *page, int v); #else #define page_ref_tracepoint_active(t) false static inline void __page_ref_set(struct page *page, int v) { } static inline void __page_ref_mod(struct page *page, int v) { } static inline void __page_ref_mod_and_test(struct page *page, int v, int ret) { } static inline void __page_ref_mod_and_return(struct page *page, int v, int ret) { } static inline void __page_ref_mod_unless(struct page *page, int v, int u) { } static inline void __page_ref_freeze(struct page *page, int v, int ret) { } static inline void __page_ref_unfreeze(struct page *page, int v) { } #endif static inline int page_ref_count(const struct page *page) { return atomic_read(&page->_refcount); } /** * folio_ref_count - The reference count on this folio. * @folio: The folio. * * The refcount is usually incremented by calls to folio_get() and * decremented by calls to folio_put(). Some typical users of the * folio refcount: * * - Each reference from a page table * - The page cache * - Filesystem private data * - The LRU list * - Pipes * - Direct IO which references this page in the process address space * * Return: The number of references to this folio. */ static inline int folio_ref_count(const struct folio *folio) { return page_ref_count(&folio->page); } static inline int page_count(const struct page *page) { return folio_ref_count(page_folio(page)); } static inline void set_page_count(struct page *page, int v) { atomic_set(&page->_refcount, v); if (page_ref_tracepoint_active(page_ref_set)) __page_ref_set(page, v); } static inline void folio_set_count(struct folio *folio, int v) { set_page_count(&folio->page, v); } /* * Setup the page count before being freed into the page allocator for * the first time (boot or memory hotplug) */ static inline void init_page_count(struct page *page) { set_page_count(page, 1); } static inline void page_ref_add(struct page *page, int nr) { atomic_add(nr, &page->_refcount); if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, nr); } static inline void folio_ref_add(struct folio *folio, int nr) { page_ref_add(&folio->page, nr); } static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, -nr); } static inline void folio_ref_sub(struct folio *folio, int nr) { page_ref_sub(&folio->page, nr); } static inline int folio_ref_sub_return(struct folio *folio, int nr) { int ret = atomic_sub_return(nr, &folio->_refcount); if (page_ref_tracepoint_active(page_ref_mod_and_return)) __page_ref_mod_and_return(&folio->page, -nr, ret); return ret; } static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, 1); } static inline void folio_ref_inc(struct folio *folio) { page_ref_inc(&folio->page); } static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, -1); } static inline void folio_ref_dec(struct folio *folio) { page_ref_dec(&folio->page); } static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); if (page_ref_tracepoint_active(page_ref_mod_and_test)) __page_ref_mod_and_test(page, -nr, ret); return ret; } static inline int folio_ref_sub_and_test(struct folio *folio, int nr) { return page_ref_sub_and_test(&folio->page, nr); } static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); if (page_ref_tracepoint_active(page_ref_mod_and_return)) __page_ref_mod_and_return(page, 1, ret); return ret; } static inline int folio_ref_inc_return(struct folio *folio) { return page_ref_inc_return(&folio->page); } static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); if (page_ref_tracepoint_active(page_ref_mod_and_test)) __page_ref_mod_and_test(page, -1, ret); return ret; } static inline int folio_ref_dec_and_test(struct folio *folio) { return page_ref_dec_and_test(&folio->page); } static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); if (page_ref_tracepoint_active(page_ref_mod_and_return)) __page_ref_mod_and_return(page, -1, ret); return ret; } static inline int folio_ref_dec_return(struct folio *folio) { return page_ref_dec_return(&folio->page); } static inline bool page_ref_add_unless(struct page *page, int nr, int u) { bool ret = false; rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ if (!page_is_fake_head(page) && page_ref_count(page) != u) ret = atomic_add_unless(&page->_refcount, nr, u); rcu_read_unlock(); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); return ret; } static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) { return page_ref_add_unless(&folio->page, nr, u); } /** * folio_try_get - Attempt to increase the refcount on a folio. * @folio: The folio. * * If you do not already have a reference to a folio, you can attempt to * get one using this function. It may fail if, for example, the folio * has been freed since you found a pointer to it, or it is frozen for * the purposes of splitting or migration. * * Return: True if the reference count was successfully incremented. */ static inline bool folio_try_get(struct folio *folio) { return folio_ref_add_unless(folio, 1, 0); } static inline bool folio_ref_try_add(struct folio *folio, int count) { return folio_ref_add_unless(folio, count, 0); } static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); if (page_ref_tracepoint_active(page_ref_freeze)) __page_ref_freeze(page, count, ret); return ret; } static inline int folio_ref_freeze(struct folio *folio, int count) { return page_ref_freeze(&folio->page, count); } static inline void page_ref_unfreeze(struct page *page, int count) { VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); atomic_set_release(&page->_refcount, count); if (page_ref_tracepoint_active(page_ref_unfreeze)) __page_ref_unfreeze(page, count); } static inline void folio_ref_unfreeze(struct folio *folio, int count) { page_ref_unfreeze(&folio->page, count); } #endif |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Dynamic queue limits (dql) - Definitions * * Copyright (c) 2011, Tom Herbert <therbert@google.com> * * This header file contains the definitions for dynamic queue limits (dql). * dql would be used in conjunction with a producer/consumer type queue * (possibly a HW queue). Such a queue would have these general properties: * * 1) Objects are queued up to some limit specified as number of objects. * 2) Periodically a completion process executes which retires consumed * objects. * 3) Starvation occurs when limit has been reached, all queued data has * actually been consumed, but completion processing has not yet run * so queuing new data is blocked. * 4) Minimizing the amount of queued data is desirable. * * The goal of dql is to calculate the limit as the minimum number of objects * needed to prevent starvation. * * The primary functions of dql are: * dql_queued - called when objects are enqueued to record number of objects * dql_avail - returns how many objects are available to be queued based * on the object limit and how many objects are already enqueued * dql_completed - called at completion time to indicate how many objects * were retired from the queue * * The dql implementation does not implement any locking for the dql data * structures, the higher layer should provide this. dql_queued should * be serialized to prevent concurrent execution of the function; this * is also true for dql_completed. However, dql_queued and dlq_completed can * be executed concurrently (i.e. they can be protected by different locks). */ #ifndef _LINUX_DQL_H #define _LINUX_DQL_H #ifdef __KERNEL__ #include <linux/bitops.h> #include <asm/bug.h> #define DQL_HIST_LEN 4 #define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ /* Stall threshold (in jiffies), defined by user */ unsigned short stall_thrs; unsigned long history_head; /* top 58 bits of jiffies */ /* stall entries, a bit per entry */ unsigned long history[DQL_HIST_LEN]; /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ unsigned int num_completed; /* Total ever completed */ unsigned int prev_ovlimit; /* Previous over limit */ unsigned int prev_num_queued; /* Previous queue total */ unsigned int prev_last_obj_cnt; /* Previous queuing cnt */ unsigned int lowest_slack; /* Lowest slack found */ unsigned long slack_start_time; /* Time slacks seen */ /* Configuration */ unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ /* Longest stall detected, reported to user */ unsigned short stall_max; unsigned long last_reap; /* Last reap (in jiffies) */ unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ #define DQL_MAX_OBJECT (UINT_MAX / 16) #define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT) /* Populate the bitmap to be processed later in dql_check_stall() */ static inline void dql_queue_stall(struct dql *dql) { unsigned long map, now, now_hi, i; now = jiffies; now_hi = now / BITS_PER_LONG; /* The following code set a bit in the ring buffer, where each * bit trackes time the packet was queued. The dql->history buffer * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot */ if (unlikely(now_hi != dql->history_head)) { /* About to reuse slots, clear them */ for (i = 0; i < DQL_HIST_LEN; i++) { /* Multiplication masks high bits */ if (now_hi * BITS_PER_LONG == (dql->history_head + i) * BITS_PER_LONG) break; DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; } /* pairs with smp_rmb() in dql_check_stall() */ smp_wmb(); WRITE_ONCE(dql->history_head, now_hi); } /* __set_bit() does not guarantee WRITE_ONCE() semantics */ map = DQL_HIST_ENT(dql, now_hi); /* Populate the history with an entry (bit) per queued */ if (!(map & BIT_MASK(now))) WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } /* * Record number of objects queued. Assumes that caller has already checked * availability in the queue with dql_avail. */ static inline void dql_queued(struct dql *dql, unsigned int count) { if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) return; WRITE_ONCE(dql->last_obj_cnt, count); /* We want to force a write first, so that cpu do not attempt * to get cache line containing last_obj_cnt, num_queued, adj_limit * in Shared state, but directly does a Request For Ownership * It is only a hint, we use barrier() only. */ barrier(); dql->num_queued += count; /* Only populate stall information if the threshold is set */ if (READ_ONCE(dql->stall_thrs)) dql_queue_stall(dql); } /* Returns how many objects can be queued, < 0 indicates over limit. */ static inline int dql_avail(const struct dql *dql) { return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued); } /* Record number of completed objects and recalculate the limit. */ void dql_completed(struct dql *dql, unsigned int count); /* Reset dql state */ void dql_reset(struct dql *dql); /* Initialize dql state */ void dql_init(struct dql *dql, unsigned int hold_time); #endif /* _KERNEL_ */ #endif /* _LINUX_DQL_H */ |
| 55 55 3 32 32 32 3 6 5 3 2 2 1 6 14 14 17 5 34 3 31 31 19 22 16 1 40 3 1 20 4 15 25 1 1 22 19 1 1 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 | // SPDX-License-Identifier: GPL-2.0 /* * fs/signalfd.c * * Copyright (C) 2003 Linus Torvalds * * Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> * Changed ->read() to return a siginfo strcture instead of signal number. * Fixed locking in ->poll(). * Added sighand-detach notification. * Added fd re-use in sys_signalfd() syscall. * Now using anonymous inode source. * Thanks to Oleg Nesterov for useful code review and suggestions. * More comments and suggestions from Arnd Bergmann. * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> * Retrieve multiple signals with one read() call * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org> * Attach to the sighand only during read() and poll(). */ #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/list.h> #include <linux/anon_inodes.h> #include <linux/signalfd.h> #include <linux/syscalls.h> #include <linux/proc_fs.h> #include <linux/compat.h> void signalfd_cleanup(struct sighand_struct *sighand) { wake_up_pollfree(&sighand->signalfd_wqh); } struct signalfd_ctx { sigset_t sigmask; }; static int signalfd_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static __poll_t signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; __poll_t events = 0; poll_wait(file, ¤t->sighand->signalfd_wqh, wait); spin_lock_irq(¤t->sighand->siglock); if (next_signal(¤t->pending, &ctx->sigmask) || next_signal(¤t->signal->shared_pending, &ctx->sigmask)) events |= EPOLLIN; spin_unlock_irq(¤t->sighand->siglock); return events; } /* * Copied from copy_siginfo_to_user() in kernel/signal.c */ static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo) { struct signalfd_siginfo new; BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); /* * Unused members should be zero ... */ memset(&new, 0, sizeof(new)); /* * If you change siginfo_t structure, please be sure * this code is fixed accordingly. */ new.ssi_signo = kinfo->si_signo; new.ssi_errno = kinfo->si_errno; new.ssi_code = kinfo->si_code; switch (siginfo_layout(kinfo->si_signo, kinfo->si_code)) { case SIL_KILL: new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; break; case SIL_TIMER: new.ssi_tid = kinfo->si_tid; new.ssi_overrun = kinfo->si_overrun; new.ssi_ptr = (long) kinfo->si_ptr; new.ssi_int = kinfo->si_int; break; case SIL_POLL: new.ssi_band = kinfo->si_band; new.ssi_fd = kinfo->si_fd; break; case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: case SIL_FAULT_PERF_EVENT: /* * Fall through to the SIL_FAULT case. SIL_FAULT_BNDERR, * SIL_FAULT_PKUERR, and SIL_FAULT_PERF_EVENT are only * generated by faults that deliver them synchronously to * userspace. In case someone injects one of these signals * and signalfd catches it treat it as SIL_FAULT. */ case SIL_FAULT: new.ssi_addr = (long) kinfo->si_addr; break; case SIL_FAULT_TRAPNO: new.ssi_addr = (long) kinfo->si_addr; new.ssi_trapno = kinfo->si_trapno; break; case SIL_FAULT_MCEERR: new.ssi_addr = (long) kinfo->si_addr; new.ssi_addr_lsb = (short) kinfo->si_addr_lsb; break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; new.ssi_status = kinfo->si_status; new.ssi_utime = kinfo->si_utime; new.ssi_stime = kinfo->si_stime; break; case SIL_RT: /* * This case catches also the signals queued by sigqueue(). */ new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; new.ssi_ptr = (long) kinfo->si_ptr; new.ssi_int = kinfo->si_int; break; case SIL_SYS: new.ssi_call_addr = (long) kinfo->si_call_addr; new.ssi_syscall = kinfo->si_syscall; new.ssi_arch = kinfo->si_arch; break; } if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to)) return -EFAULT; return sizeof(struct signalfd_siginfo); } static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, int nonblock) { enum pid_type type; ssize_t ret; DECLARE_WAITQUEUE(wait, current); spin_lock_irq(¤t->sighand->siglock); ret = dequeue_signal(&ctx->sigmask, info, &type); switch (ret) { case 0: if (!nonblock) break; ret = -EAGAIN; fallthrough; default: spin_unlock_irq(¤t->sighand->siglock); return ret; } add_wait_queue(¤t->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = dequeue_signal(&ctx->sigmask, info, &type); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } spin_unlock_irq(¤t->sighand->siglock); schedule(); spin_lock_irq(¤t->sighand->siglock); } spin_unlock_irq(¤t->sighand->siglock); remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); return ret; } /* * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative * error code. The "count" parameter must be at least the size of a * "struct signalfd_siginfo". */ static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct signalfd_ctx *ctx = file->private_data; size_t count = iov_iter_count(to); ssize_t ret, total = 0; kernel_siginfo_t info; bool nonblock; count /= sizeof(struct signalfd_siginfo); if (!count) return -EINVAL; nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) break; ret = signalfd_copyinfo(to, &info); if (ret < 0) break; total += ret; nonblock = 1; } while (--count); return total ? total: ret; } #ifdef CONFIG_PROC_FS static void signalfd_show_fdinfo(struct seq_file *m, struct file *f) { struct signalfd_ctx *ctx = f->private_data; sigset_t sigmask; sigmask = ctx->sigmask; signotset(&sigmask); render_sigset_t(m, "sigmask:\t", &sigmask); } #endif static const struct file_operations signalfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = signalfd_show_fdinfo, #endif .release = signalfd_release, .poll = signalfd_poll, .read_iter = signalfd_read_iter, .llseek = noop_llseek, }; static int do_signalfd4(int ufd, sigset_t *mask, int flags) { struct signalfd_ctx *ctx; /* Check the SFD_* constants for consistency. */ BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK); if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) return -EINVAL; sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); signotset(mask); if (ufd == -1) { struct file *file; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->sigmask = *mask; ufd = get_unused_fd_flags(flags & O_CLOEXEC); if (ufd < 0) { kfree(ctx); return ufd; } file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx, O_RDWR | (flags & O_NONBLOCK)); if (IS_ERR(file)) { put_unused_fd(ufd); kfree(ctx); return PTR_ERR(file); } file->f_mode |= FMODE_NOWAIT; fd_install(ufd, file); } else { CLASS(fd, f)(ufd); if (fd_empty(f)) return -EBADF; ctx = fd_file(f)->private_data; if (fd_file(f)->f_op != &signalfd_fops) return -EINVAL; spin_lock_irq(¤t->sighand->siglock); ctx->sigmask = *mask; spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); } return ufd; } SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, size_t, sizemask, int, flags) { sigset_t mask; if (sizemask != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&mask, user_mask, sizeof(mask))) return -EFAULT; return do_signalfd4(ufd, &mask, flags); } SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, size_t, sizemask) { sigset_t mask; if (sizemask != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&mask, user_mask, sizeof(mask))) return -EFAULT; return do_signalfd4(ufd, &mask, 0); } #ifdef CONFIG_COMPAT static long do_compat_signalfd4(int ufd, const compat_sigset_t __user *user_mask, compat_size_t sigsetsize, int flags) { sigset_t mask; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (get_compat_sigset(&mask, user_mask)) return -EFAULT; return do_signalfd4(ufd, &mask, flags); } COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize, int, flags) { return do_compat_signalfd4(ufd, user_mask, sigsetsize, flags); } COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd, const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize) { return do_compat_signalfd4(ufd, user_mask, sigsetsize, 0); } #endif |
| 105 47 44 45 50 182 11 11 11 11 11 11 11 11 58 58 58 57 45 12 58 58 58 57 26 258 26 186 34 58 44 97 253 252 253 185 105 206 94 63 195 195 195 104 105 95 12 105 104 105 72 72 70 70 70 52 21 16 184 183 184 184 3 47 104 2 32 72 112 106 39 48 39 72 72 71 45 46 45 45 186 186 2 186 186 186 182 186 64 64 64 5 5 5 1 1 2 2 2 2 18 17 17 17 5 5 5 17 5 17 5 5 5 5 5 5 2 5 106 106 34 98 97 105 105 105 105 64 2 63 63 63 105 2 41 93 105 105 105 105 17 17 17 17 17 17 17 93 33 93 93 4 60 75 155 5 6 152 152 152 150 6 151 4 10 141 42 120 132 1 1 1 1 108 23 3 2 6 15 19 3 3 15 15 131 130 114 12 12 12 6 9 4 7 7 116 4 4 114 114 109 109 26 99 18 2 15 18 1 18 1 1 6 6 1 5 6 3 1 3 1 25 11 1 2 72 73 72 25 57 9 3 6 1 70 69 69 1 59 9 1 18 5 38 18 15 5 1 54 24 1 4 5 3 15 1 22 26 22 5 25 11 37 1 8 38 59 6 6 42 51 14 14 14 53 14 1 1 67 25 56 72 5 2 3 1 12 10 2 2 9 9 5 5 6 5 5 5 6 5 2 4 1 5 6 15 15 15 1 14 14 14 15 15 15 2 2 2 15 15 15 9 13 15 15 15 1 15 13 2 12 12 12 9 3 15 15 2 13 54 53 28 24 2 2 2 2 2 29 1 28 28 57 57 12 7 3 35 1 3 2 29 31 54 54 50 4 54 54 54 54 50 4 5 51 3 5 5 5 9 45 45 53 49 4 53 9 45 73 69 73 73 73 2 2 35 55 8 8 66 46 22 46 1 10 41 8 73 73 73 8 67 2 1 73 74 60 16 74 74 71 16 68 73 72 73 18 68 72 73 73 73 73 1 1 73 73 73 73 73 74 73 74 25 52 69 73 8 8 59 2 75 75 8 1 72 2 74 74 70 2 2 71 74 75 4 3 1 65 27 38 5 38 23 4 8 33 11 5 5 59 62 12 54 3 1 2 3 1 4 5 3 1 2 5 1 4 3 1 71 71 70 52 36 71 5 66 2 69 69 20 26 19 1 1 2 2 4 69 69 49 1 31 69 1 65 4 14 59 56 3 69 18 18 18 16 3 20 20 170 170 168 169 30 160 1 1 1 1 1 51 177 174 3 18 16 2 16 4 15 15 2 1 6 13 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 | // SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/data.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/mm.h> #include <linux/mpage.h> #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/blk-crypto.h> #include <linux/swap.h> #include <linux/prefetch.h> #include <linux/uio.h> #include <linux/sched/signal.h> #include <linux/fiemap.h> #include <linux/iomap.h> #include "f2fs.h" #include "node.h" #include "segment.h" #include "iostat.h" #include <trace/events/f2fs.h> #define NUM_PREALLOC_POST_READ_CTXS 128 static struct kmem_cache *bio_post_read_ctx_cache; static struct kmem_cache *bio_entry_slab; static mempool_t *bio_post_read_ctx_pool; static struct bio_set f2fs_bioset; #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE int __init f2fs_init_bioset(void) { return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); } void f2fs_destroy_bioset(void) { bioset_exit(&f2fs_bioset); } bool f2fs_is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; struct f2fs_sb_info *sbi; if (!mapping) return false; inode = mapping->host; sbi = F2FS_I_SB(inode); if (inode->i_ino == F2FS_META_INO(sbi) || inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode)) return true; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || page_private_gcing(page)) return true; return false; } static enum count_type __read_io_type(struct page *page) { struct address_space *mapping = page_file_mapping(page); if (mapping) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (inode->i_ino == F2FS_META_INO(sbi)) return F2FS_RD_META; if (inode->i_ino == F2FS_NODE_INO(sbi)) return F2FS_RD_NODE; } return F2FS_RD_DATA; } /* postprocessing steps for read bios */ enum bio_post_read_step { #ifdef CONFIG_FS_ENCRYPTION STEP_DECRYPT = BIT(0), #else STEP_DECRYPT = 0, /* compile out the decryption-related code */ #endif #ifdef CONFIG_F2FS_FS_COMPRESSION STEP_DECOMPRESS = BIT(1), #else STEP_DECOMPRESS = 0, /* compile out the decompression-related code */ #endif #ifdef CONFIG_FS_VERITY STEP_VERITY = BIT(2), #else STEP_VERITY = 0, /* compile out the verity-related code */ #endif }; struct bio_post_read_ctx { struct bio *bio; struct f2fs_sb_info *sbi; struct work_struct work; unsigned int enabled_steps; /* * decompression_attempted keeps track of whether * f2fs_end_read_compressed_page() has been called on the pages in the * bio that belong to a compressed cluster yet. */ bool decompression_attempted; block_t fs_blkaddr; }; /* * Update and unlock a bio's pages, and free the bio. * * This marks pages up-to-date only if there was no error in the bio (I/O error, * decryption error, or verity error), as indicated by bio->bi_status. * * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk) * aren't marked up-to-date here, as decompression is done on a per-compression- * cluster basis rather than a per-bio basis. Instead, we only must do two * things for each compressed page here: call f2fs_end_read_compressed_page() * with failed=true if an error occurred before it would have normally gotten * called (i.e., I/O error or decryption error, but *not* verity error), and * release the bio's reference to the decompress_io_ctx of the page's cluster. */ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; struct bio_post_read_ctx *ctx = bio->bi_private; bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; if (f2fs_is_compressed_page(page)) { if (ctx && !ctx->decompression_attempted) f2fs_end_read_compressed_page(page, true, 0, in_task); f2fs_put_page_dic(page, in_task); continue; } if (bio->bi_status) ClearPageUptodate(page); else SetPageUptodate(page); dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } if (ctx) mempool_free(ctx, bio_post_read_ctx_pool); bio_put(bio); } static void f2fs_verify_bio(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS); /* * fsverity_verify_bio() may call readahead() again, and while verity * will be disabled for this, decryption and/or decompression may still * be needed, resulting in another bio_post_read_ctx being allocated. * So to prevent deadlocks we need to release the current ctx to the * mempool first. This assumes that verity is the last post-read step. */ mempool_free(ctx, bio_post_read_ctx_pool); bio->bi_private = NULL; /* * Verify the bio's pages with fs-verity. Exclude compressed pages, * as those were handled separately by f2fs_end_read_compressed_page(). */ if (may_have_compressed_pages) { struct bio_vec *bv; struct bvec_iter_all iter_all; bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; if (!f2fs_is_compressed_page(page) && !fsverity_verify_page(page)) { bio->bi_status = BLK_STS_IOERR; break; } } } else { fsverity_verify_bio(bio); } f2fs_finish_read_bio(bio, true); } /* * If the bio's data needs to be verified with fs-verity, then enqueue the * verity work for the bio. Otherwise finish the bio now. * * Note that to avoid deadlocks, the verity work can't be done on the * decryption/decompression workqueue. This is because verifying the data pages * can involve reading verity metadata pages from the file, and these verity * metadata pages may be encrypted and/or compressed. */ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) { struct bio_post_read_ctx *ctx = bio->bi_private; if (ctx && (ctx->enabled_steps & STEP_VERITY)) { INIT_WORK(&ctx->work, f2fs_verify_bio); fsverity_enqueue_verify_work(&ctx->work); } else { f2fs_finish_read_bio(bio, in_task); } } /* * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last * remaining page was read by @ctx->bio. * * Note that a bio may span clusters (even a mix of compressed and uncompressed * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates * that the bio includes at least one compressed page. The actual decompression * is done on a per-cluster basis, not a per-bio basis. */ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bool in_task) { struct bio_vec *bv; struct bvec_iter_all iter_all; bool all_compressed = true; block_t blkaddr = ctx->fs_blkaddr; bio_for_each_segment_all(bv, ctx->bio, iter_all) { struct page *page = bv->bv_page; if (f2fs_is_compressed_page(page)) f2fs_end_read_compressed_page(page, false, blkaddr, in_task); else all_compressed = false; blkaddr++; } ctx->decompression_attempted = true; /* * Optimization: if all the bio's pages are compressed, then scheduling * the per-bio verity work is unnecessary, as verity will be fully * handled at the compression cluster level. */ if (all_compressed) ctx->enabled_steps &= ~STEP_VERITY; } static void f2fs_post_read_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) { f2fs_finish_read_bio(bio, true); return; } if (ctx->enabled_steps & STEP_DECOMPRESS) f2fs_handle_step_decompress(ctx, true); f2fs_verify_and_finish_bio(bio, true); } static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); struct bio_post_read_ctx *ctx; bool intask = in_task(); iostat_update_and_unbind_ctx(bio); ctx = bio->bi_private; if (time_to_inject(sbi, FAULT_READ_IO)) bio->bi_status = BLK_STS_IOERR; if (bio->bi_status) { f2fs_finish_read_bio(bio, intask); return; } if (ctx) { unsigned int enabled_steps = ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS); /* * If we have only decompression step between decompression and * decrypt, we don't need post processing for this. */ if (enabled_steps == STEP_DECOMPRESS && !f2fs_low_mem_mode(sbi)) { f2fs_handle_step_decompress(ctx, intask); } else if (enabled_steps) { INIT_WORK(&ctx->work, f2fs_post_read_work); queue_work(ctx->sbi->post_read_wq, &ctx->work); return; } } f2fs_verify_and_finish_bio(bio, intask); } static void f2fs_write_end_io(struct bio *bio) { struct f2fs_sb_info *sbi; struct bio_vec *bvec; struct bvec_iter_all iter_all; iostat_update_and_unbind_ctx(bio); sbi = bio->bi_private; if (time_to_inject(sbi, FAULT_WRITE_IO)) bio->bi_status = BLK_STS_IOERR; bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page, false); fscrypt_finalize_bounce_page(&page); #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_is_compressed_page(page)) { f2fs_compress_write_end_io(bio, page); continue; } #endif if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); if (type == F2FS_WB_CP_DATA) f2fs_stop_checkpoint(sbi, true, STOP_CP_REASON_WRITE_FAIL); } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && page_folio(page)->index != nid_of_node(page)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, page)) f2fs_del_fsync_node_entry(sbi, page); clear_page_private_gcing(page); end_page_writeback(page); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && wq_has_sleeper(&sbi->cp_wait)) wake_up(&sbi->cp_wait); bio_put(bio); } #ifdef CONFIG_BLK_DEV_ZONED static void f2fs_zone_write_end_io(struct bio *bio) { struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private; bio->bi_private = io->bi_private; complete(&io->zone_wait); f2fs_write_end_io(bio); } #endif struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, sector_t *sector) { struct block_device *bdev = sbi->sb->s_bdev; int i; if (f2fs_is_multi_device(sbi)) { for (i = 0; i < sbi->s_ndevs; i++) { if (FDEV(i).start_blk <= blk_addr && FDEV(i).end_blk >= blk_addr) { blk_addr -= FDEV(i).start_blk; bdev = FDEV(i).bdev; break; } } } if (sector) *sector = SECTOR_FROM_BLOCK(blk_addr); return bdev; } int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) { int i; if (!f2fs_is_multi_device(sbi)) return 0; for (i = 0; i < sbi->s_ndevs; i++) if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) return i; return 0; } static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); unsigned int fua_flag, meta_flag, io_flag; blk_opf_t op_flags = 0; if (fio->op != REQ_OP_WRITE) return 0; if (fio->type == DATA) io_flag = fio->sbi->data_io_flag; else if (fio->type == NODE) io_flag = fio->sbi->node_io_flag; else return 0; fua_flag = io_flag & temp_mask; meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; /* * data/node io flag bits per temp: * REQ_META | REQ_FUA | * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | */ if (BIT(fio->temp) & meta_flag) op_flags |= REQ_META; if (BIT(fio->temp) & fua_flag) op_flags |= REQ_FUA; return op_flags; } static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) { struct f2fs_sb_info *sbi = fio->sbi; struct block_device *bdev; sector_t sector; struct bio *bio; bdev = f2fs_target_device(sbi, fio->new_blkaddr, §or); bio = bio_alloc_bioset(bdev, npages, fio->op | fio->op_flags | f2fs_io_flags(fio), GFP_NOIO, &f2fs_bioset); bio->bi_iter.bi_sector = sector; if (is_read_io(fio->op)) { bio->bi_end_io = f2fs_read_end_io; bio->bi_private = NULL; } else { bio->bi_end_io = f2fs_write_end_io; bio->bi_private = sbi; bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, fio->type, fio->temp); } iostat_alloc_and_bind_ctx(sbi, bio, NULL); if (fio->io_wbc) wbc_init_bio(fio->io_wbc, bio); return bio; } static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, pgoff_t first_idx, const struct f2fs_io_info *fio, gfp_t gfp_mask) { /* * The f2fs garbage collector sets ->encrypted_page when it wants to * read/write raw data without encryption. */ if (!fio || !fio->encrypted_page) fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); } static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, pgoff_t next_idx, const struct f2fs_io_info *fio) { /* * The f2fs garbage collector sets ->encrypted_page when it wants to * read/write raw data without encryption. */ if (fio && fio->encrypted_page) return !bio_has_crypt_ctx(bio); return fscrypt_mergeable_bio(bio, inode, next_idx); } void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(!is_read_io(bio_op(bio))); trace_f2fs_submit_read_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); } static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(is_read_io(bio_op(bio))); if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type)) blk_finish_plug(current->plug); trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); } static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; if (!io->bio) return; if (is_read_io(fio->op)) { trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); f2fs_submit_read_bio(io->sbi, io->bio, fio->type); } else { trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); f2fs_submit_write_bio(io->sbi, io->bio, fio->type); } io->bio = NULL; } static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; struct bvec_iter_all iter_all; if (!bio) return false; if (!inode && !page && !ino) return true; bio_for_each_segment_all(bvec, bio, iter_all) { struct page *target = bvec->bv_page; if (fscrypt_is_bounce_page(target)) { target = fscrypt_pagecache_page(target); if (IS_ERR(target)) continue; } if (f2fs_is_compressed_page(target)) { target = f2fs_compress_control_page(target); if (IS_ERR(target)) continue; } if (inode && inode == target->mapping->host) return true; if (page && page == target) return true; if (ino && ino == ino_of_node(target)) return true; } return false; } int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) { int i; for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1 : NR_TEMP_TYPE; int j; sbi->write_io[i] = f2fs_kmalloc(sbi, array_size(n, sizeof(struct f2fs_bio_info)), GFP_KERNEL); if (!sbi->write_io[i]) return -ENOMEM; for (j = HOT; j < n; j++) { struct f2fs_bio_info *io = &sbi->write_io[i][j]; init_f2fs_rwsem(&io->io_rwsem); io->sbi = sbi; io->bio = NULL; io->last_block_in_bio = 0; spin_lock_init(&io->io_lock); INIT_LIST_HEAD(&io->io_list); INIT_LIST_HEAD(&io->bio_list); init_f2fs_rwsem(&io->bio_list_lock); #ifdef CONFIG_BLK_DEV_ZONED init_completion(&io->zone_wait); io->zone_pending_bio = NULL; io->bi_private = NULL; #endif } } return 0; } static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_write(&io->io_rwsem); if (!io->bio) goto unlock_out; /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC; if (!test_opt(sbi, NOBARRIER)) io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); unlock_out: f2fs_up_write(&io->io_rwsem); } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type, bool force) { enum temp_type temp; bool ret = true; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { if (!force) { enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; f2fs_down_read(&io->io_rwsem); ret = __has_merged_page(io->bio, inode, page, ino); f2fs_up_read(&io->io_rwsem); } if (ret) __f2fs_submit_merged_write(sbi, type, temp); /* TODO: use HOT temp only for meta pages now. */ if (type >= META) break; } } void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) { __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type) { __submit_merged_write_cond(sbi, inode, page, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) { f2fs_submit_merged_write(sbi, DATA); f2fs_submit_merged_write(sbi, NODE); f2fs_submit_merged_write(sbi, META); } /* * Fill the locked page with data located in the block address. * A caller needs to unlock the page on failure. */ int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; trace_f2fs_submit_page_bio(page, fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, page_folio(fio->page)->index, fio, GFP_NOIO); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; } if (fio->io_wbc && !is_read_io(fio->op)) wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); else f2fs_submit_write_bio(fio->sbi, bio, fio->type); return 0; } static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, block_t last_blkaddr, block_t cur_blkaddr) { if (unlikely(sbi->max_io_bytes && bio->bi_iter.bi_size >= sbi->max_io_bytes)) return false; if (last_blkaddr + 1 != cur_blkaddr) return false; return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL); } static bool io_type_is_mergeable(struct f2fs_bio_info *io, struct f2fs_io_info *fio) { if (io->fio.op != fio->op) return false; return io->fio.op_flags == fio->op_flags; } static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, struct f2fs_bio_info *io, struct f2fs_io_info *fio, block_t last_blkaddr, block_t cur_blkaddr) { if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) return false; return io_type_is_mergeable(io, fio); } static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, struct page *page, enum temp_type temp) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct bio_entry *be; be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL); be->bio = bio; bio_get(bio); if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) f2fs_bug_on(sbi, 1); f2fs_down_write(&io->bio_list_lock); list_add_tail(&be->list, &io->bio_list); f2fs_up_write(&io->bio_list_lock); } static void del_bio_entry(struct bio_entry *be) { list_del(&be->list); kmem_cache_free(bio_entry_slab, be); } static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct page *page) { struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; int ret = -EAGAIN; for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct list_head *head = &io->bio_list; struct bio_entry *be; f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (be->bio != *bio) continue; found = true; f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio, *fio->last_block, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, fio->page->mapping->host, page_folio(fio->page)->index, fio) && bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { ret = 0; break; } /* page can't be merged into bio; submit the bio */ del_bio_entry(be); f2fs_submit_write_bio(sbi, *bio, DATA); break; } f2fs_up_write(&io->bio_list_lock); } if (ret) { bio_put(*bio); *bio = NULL; } return ret; } void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, struct bio **bio, struct page *page) { enum temp_type temp; bool found = false; struct bio *target = bio ? *bio : NULL; f2fs_bug_on(sbi, !target && !page); for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct list_head *head = &io->bio_list; struct bio_entry *be; if (list_empty(head)) continue; f2fs_down_read(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, page, 0); if (found) break; } f2fs_up_read(&io->bio_list_lock); if (!found) continue; found = false; f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, page, 0); if (found) { target = be->bio; del_bio_entry(be); break; } } f2fs_up_write(&io->bio_list_lock); } if (found) f2fs_submit_write_bio(sbi, target, DATA); if (bio && *bio) { bio_put(*bio); *bio = NULL; } } int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; trace_f2fs_submit_page_bio(page, fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, page_folio(fio->page)->index, fio, GFP_NOIO); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { if (add_ipu_page(fio, &bio, page)) goto alloc_new; } if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), PAGE_SIZE); inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; return 0; } #ifdef CONFIG_BLK_DEV_ZONED static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) { struct block_device *bdev = sbi->sb->s_bdev; int devi = 0; if (f2fs_is_multi_device(sbi)) { devi = f2fs_target_device_index(sbi, blkaddr); if (blkaddr < FDEV(devi).start_blk || blkaddr > FDEV(devi).end_blk) { f2fs_err(sbi, "Invalid block %x", blkaddr); return false; } blkaddr -= FDEV(devi).start_blk; bdev = FDEV(devi).bdev; } return bdev_is_zoned(bdev) && f2fs_blkz_is_seq(sbi, devi, blkaddr) && (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); } #endif void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); f2fs_down_write(&io->io_rwsem); next: #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) { wait_for_completion_io(&io->zone_wait); bio_put(io->zone_pending_bio); io->zone_pending_bio = NULL; io->bi_private = NULL; } #endif if (fio->in_list) { spin_lock(&io->io_lock); if (list_empty(&io->io_list)) { spin_unlock(&io->io_lock); goto out; } fio = list_first_entry(&io->io_list, struct f2fs_io_info, list); list_del(&fio->list); spin_unlock(&io->io_lock); } verify_fio_blkaddr(fio); if (fio->encrypted_page) bio_page = fio->encrypted_page; else if (fio->compressed_page) bio_page = fio->compressed_page; else bio_page = fio->page; /* set submitted = true as a return value */ fio->submitted = 1; type = WB_DATA_TYPE(bio_page, fio->compressed_page); inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, page_folio(bio_page)->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, page_folio(bio_page)->index, fio, GFP_NOIO); io->fio = *fio; } if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { __submit_merged_bio(io); goto alloc_new; } if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; trace_f2fs_submit_page_write(fio->page, fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { bio_get(io->bio); reinit_completion(&io->zone_wait); io->bi_private = io->bio->bi_private; io->bio->bi_private = io; io->bio->bi_end_io = f2fs_zone_write_end_io; io->zone_pending_bio = io->bio; __submit_merged_bio(io); } #endif if (fio->in_list) goto next; out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); f2fs_up_write(&io->io_rwsem); } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, blk_opf_t op_flag, pgoff_t first_idx, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; struct bio_post_read_ctx *ctx = NULL; unsigned int post_read_steps = 0; sector_t sector; struct block_device *bdev = f2fs_target_device(sbi, blkaddr, §or); bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages), REQ_OP_READ | op_flag, for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset); if (!bio) return ERR_PTR(-ENOMEM); bio->bi_iter.bi_sector = sector; f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); bio->bi_end_io = f2fs_read_end_io; if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= STEP_DECRYPT; if (f2fs_need_verity(inode, first_idx)) post_read_steps |= STEP_VERITY; /* * STEP_DECOMPRESS is handled specially, since a compressed file might * contain both compressed and uncompressed clusters. We'll allocate a * bio_post_read_ctx if the file is compressed, but the caller is * responsible for enabling STEP_DECOMPRESS if it's actually needed. */ if (post_read_steps || f2fs_compressed_file(inode)) { /* Due to the mempool, this never fails. */ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); ctx->bio = bio; ctx->sbi = sbi; ctx->enabled_steps = post_read_steps; ctx->fs_blkaddr = blkaddr; ctx->decompression_attempted = false; bio->bi_private = ctx; } iostat_alloc_and_bind_ctx(sbi, bio, ctx); return bio; } /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct folio *folio, block_t blkaddr, blk_opf_t op_flags, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, folio->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) { iostat_update_and_unbind_ctx(bio); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); bio_put(bio); return -EFAULT; } inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); f2fs_submit_read_bio(sbi, bio, DATA); return 0; } static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); dn->data_blkaddr = blkaddr; addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); } /* * Lock ordering for the change of data block address: * ->data_page * ->node_page * update block addresses in the node page */ void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); __set_data_blkaddr(dn, blkaddr); if (set_page_dirty(dn->node_page)) dn->node_changed = true; } void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { f2fs_set_data_blkaddr(dn, blkaddr); f2fs_update_read_extent_cache(dn); } /* dn->ofs_in_node will be returned with up-to-date last block pointer */ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); int err; if (!count) return 0; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { __set_data_blkaddr(dn, NEW_ADDR); count--; } } if (set_page_dirty(dn->node_page)) dn->node_changed = true; return 0; } /* Should keep dn->ofs_in_node unchanged */ int f2fs_reserve_new_block(struct dnode_of_data *dn) { unsigned int ofs_in_node = dn->ofs_in_node; int ret; ret = f2fs_reserve_new_blocks(dn, 1); dn->ofs_in_node = ofs_in_node; return ret; } int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) { bool need_put = dn->inode_page ? false : true; int err; err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; if (dn->data_blkaddr == NULL_ADDR) err = f2fs_reserve_new_block(dn); if (err || need_put) f2fs_put_dnode(dn); return err; } struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; int err; page = f2fs_grab_cache_page(mapping, index, for_write); if (!page) return ERR_PTR(-ENOMEM); if (f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; goto put_err; } goto got_it; } set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { if (err == -ENOENT && next_pgofs) *next_pgofs = f2fs_get_next_page_offset(&dn, index); goto put_err; } f2fs_put_dnode(&dn); if (unlikely(dn.data_blkaddr == NULL_ADDR)) { err = -ENOENT; if (next_pgofs) *next_pgofs = index + 1; goto put_err; } if (dn.data_blkaddr != NEW_ADDR && !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; goto put_err; } got_it: if (PageUptodate(page)) { unlock_page(page); return page; } /* * A new dentry page is allocated but not able to be written, since its * new inode page couldn't be allocated due to -ENOSPC. * In such the case, its blkaddr can be remained as NEW_ADDR. * see, f2fs_add_link -> f2fs_get_new_data_page -> * f2fs_init_inode_metadata. */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) SetPageUptodate(page); unlock_page(page); return page; } err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr, op_flags, for_write); if (err) goto put_err; return page; put_err: f2fs_put_page(page, 1); return ERR_PTR(err); } struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct page *page; page = find_get_page(mapping, index); if (page && PageUptodate(page)) return page; f2fs_put_page(page, 0); page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs); if (IS_ERR(page)) return page; if (PageUptodate(page)) return page; wait_on_page_locked(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 0); return ERR_PTR(-EIO); } return page; } /* * If it tries to access a hole, return an error. * Because, the callers, functions in dir.c and GC, should be able to know * whether this page exists or not. */ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write) { struct address_space *mapping = inode->i_mapping; struct page *page; page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); if (IS_ERR(page)) return page; /* wait for read completion */ lock_page(page); if (unlikely(page->mapping != mapping || !PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } return page; } /* * Caller ensures that this data page is never allocated. * A new zero-filled data page is allocated in the page cache. * * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). * Note that, ipage is set only by make_empty_dir, and if any error occur, * ipage should be released by this function. */ struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size) { struct address_space *mapping = inode->i_mapping; struct page *page; struct dnode_of_data dn; int err; page = f2fs_grab_cache_page(mapping, index, true); if (!page) { /* * before exiting, we should make sure ipage will be released * if any error occur. */ f2fs_put_page(ipage, 1); return ERR_PTR(-ENOMEM); } set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); if (err) { f2fs_put_page(page, 1); return ERR_PTR(err); } if (!ipage) f2fs_put_dnode(&dn); if (PageUptodate(page)) goto got_it; if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); if (!PageUptodate(page)) SetPageUptodate(page); } else { f2fs_put_page(page, 1); /* if ipage exists, blkaddr should be NEW_ADDR */ f2fs_bug_on(F2FS_I_SB(inode), ipage); page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return page; } got_it: if (new_i_size && i_size_read(inode) < ((loff_t)(index + 1) << PAGE_SHIFT)) f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); return page; } static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; struct node_info ni; block_t old_blkaddr; blkcnt_t count = 1; int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; err = f2fs_get_node_info(sbi, dn->nid, &ni, false); if (err) return err; dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr == NULL_ADDR) { err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; } set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, &sum, seg_type, NULL); if (err) return err; if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(sbi, old_blkaddr); f2fs_update_data_blkaddr(dn, dn->data_blkaddr); return 0; } static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag) { if (flag == F2FS_GET_BLOCK_PRE_AIO) f2fs_down_read(&sbi->node_change); else f2fs_lock_op(sbi); } static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag) { if (flag == F2FS_GET_BLOCK_PRE_AIO) f2fs_up_read(&sbi->node_change); else f2fs_unlock_op(sbi); } int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); int err = 0; f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); if (!f2fs_lookup_read_extent_cache_block(dn->inode, index, &dn->data_blkaddr)) err = f2fs_reserve_block(dn, index); f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO); return err; } static int f2fs_map_no_dnode(struct inode *inode, struct f2fs_map_blocks *map, struct dnode_of_data *dn, pgoff_t pgoff) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); /* * There is one exceptional case that read_node_page() may return * -ENOENT due to filesystem has been shutdown or cp_error, return * -EIO in that case. */ if (map->m_may_create && (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi))) return -EIO; if (map->m_next_pgofs) *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff); if (map->m_next_extent) *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff); return 0; } static bool f2fs_map_blocks_cached(struct inode *inode, struct f2fs_map_blocks *map, int flag) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int maxblocks = map->m_len; pgoff_t pgoff = (pgoff_t)map->m_lblk; struct extent_info ei = {}; if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei)) return false; map->m_pblk = ei.blk + pgoff - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff); map->m_flags = F2FS_MAP_MAPPED; if (map->m_next_extent) *map->m_next_extent = pgoff + map->m_len; /* for hardware encryption, but to avoid potential issue in future */ if (flag == F2FS_GET_BLOCK_DIO) f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); if (f2fs_allow_multi_device_dio(sbi, flag)) { int bidx = f2fs_target_device_index(sbi, map->m_pblk); struct f2fs_dev_info *dev = &sbi->devs[bidx]; map->m_bdev = dev->bdev; map->m_pblk -= dev->start_blk; map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); } else { map->m_bdev = inode->i_sb->s_bdev; } return true; } static bool map_is_mergeable(struct f2fs_sb_info *sbi, struct f2fs_map_blocks *map, block_t blkaddr, int flag, int bidx, int ofs) { if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) return false; if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) return true; if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) return true; if (flag == F2FS_GET_BLOCK_PRE_DIO) return true; if (flag == F2FS_GET_BLOCK_DIO && map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR) return true; return false; } /* * f2fs_map_blocks() tries to find or build mapping relationship which * maps continuous logical blocks to physical blocks, and return such * info via f2fs_map_blocks structure. */ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) { unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; blkcnt_t prealloc; block_t blkaddr; unsigned int start_pgofs; int bidx = 0; bool is_hole; if (!maxblocks) return 0; if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) goto out; map->m_bdev = inode->i_sb->s_bdev; map->m_multidev_dio = f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); map->m_len = 0; map->m_flags = 0; /* it only supports block size == page size */ pgofs = (pgoff_t)map->m_lblk; end = pgofs + maxblocks; next_dnode: if (map->m_may_create) f2fs_map_lock(sbi, flag); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, pgofs, mode); if (err) { if (flag == F2FS_GET_BLOCK_BMAP) map->m_pblk = 0; if (err == -ENOENT) err = f2fs_map_no_dnode(inode, map, &dn, pgofs); goto unlock_out; } start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: blkaddr = f2fs_data_blkaddr(&dn); is_hole = !__is_valid_data_blkaddr(blkaddr); if (!is_hole && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; goto sync_out; } /* use out-place-update for direct IO under LFS mode */ if (map->m_may_create && (is_hole || (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && !f2fs_is_pinned_file(inode)))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; } switch (flag) { case F2FS_GET_BLOCK_PRE_AIO: if (blkaddr == NULL_ADDR) { prealloc++; last_ofs_in_node = dn.ofs_in_node; } break; case F2FS_GET_BLOCK_PRE_DIO: case F2FS_GET_BLOCK_DIO: err = __allocate_data_block(&dn, map->m_seg_type); if (err) goto sync_out; if (flag == F2FS_GET_BLOCK_PRE_DIO) file_need_truncate(inode); set_inode_flag(inode, FI_APPEND_WRITE); break; default: WARN_ON_ONCE(1); err = -EIO; goto sync_out; } blkaddr = dn.data_blkaddr; if (is_hole) map->m_flags |= F2FS_MAP_NEW; } else if (is_hole) { if (f2fs_compressed_file(inode) && f2fs_sanity_check_cluster(&dn)) { err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_CLUSTER); goto sync_out; } switch (flag) { case F2FS_GET_BLOCK_PRECACHE: goto sync_out; case F2FS_GET_BLOCK_BMAP: map->m_pblk = 0; goto sync_out; case F2FS_GET_BLOCK_FIEMAP: if (blkaddr == NULL_ADDR) { if (map->m_next_pgofs) *map->m_next_pgofs = pgofs + 1; goto sync_out; } break; case F2FS_GET_BLOCK_DIO: if (map->m_next_pgofs) *map->m_next_pgofs = pgofs + 1; break; default: /* for defragment case */ if (map->m_next_pgofs) *map->m_next_pgofs = pgofs + 1; goto sync_out; } } if (flag == F2FS_GET_BLOCK_PRE_AIO) goto skip; if (map->m_multidev_dio) bidx = f2fs_target_device_index(sbi, blkaddr); if (map->m_len == 0) { /* reserved delalloc block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) map->m_flags |= F2FS_MAP_DELALLOC; /* DIO READ and hole case, should not map the blocks. */ if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create)) map->m_flags |= F2FS_MAP_MAPPED; map->m_pblk = blkaddr; map->m_len = 1; if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { goto sync_out; } skip: dn.ofs_in_node++; pgofs++; /* preallocate blocks in batch for one dnode page */ if (flag == F2FS_GET_BLOCK_PRE_AIO && (pgofs == end || dn.ofs_in_node == end_offset)) { dn.ofs_in_node = ofs_in_node; err = f2fs_reserve_new_blocks(&dn, prealloc); if (err) goto sync_out; map->m_len += dn.ofs_in_node - ofs_in_node; if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { err = -ENOSPC; goto sync_out; } dn.ofs_in_node = end_offset; } if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && map->m_may_create) { /* the next block to be allocated may not be contiguous. */ if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == CAP_BLKS_PER_SEC(sbi) - 1) goto sync_out; } if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) goto next_block; if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } } f2fs_put_dnode(&dn); if (map->m_may_create) { f2fs_map_unlock(sbi, flag); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; sync_out: if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { /* * for hardware encryption, but to avoid potential issue * in future */ f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; bidx = f2fs_target_device_index(sbi, map->m_pblk); map->m_bdev = FDEV(bidx).bdev; map->m_pblk -= FDEV(bidx).start_blk; if (map->m_may_create) f2fs_update_device_state(sbi, inode->i_ino, blk_addr, map->m_len); f2fs_bug_on(sbi, blk_addr + map->m_len > FDEV(bidx).end_blk + 1); } } if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } if (map->m_next_extent) *map->m_next_extent = pgofs + 1; } f2fs_put_dnode(&dn); unlock_out: if (map->m_may_create) { f2fs_map_unlock(sbi, flag); f2fs_balance_fs(sbi, dn.node_changed); } out: trace_f2fs_map_blocks(inode, map, flag, err); return err; } bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) { struct f2fs_map_blocks map; block_t last_lblk; int err; if (pos + len > i_size_read(inode)) return false; map.m_lblk = F2FS_BYTES_TO_BLK(pos); map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; map.m_may_create = false; last_lblk = F2FS_BLK_ALIGN(pos + len); while (map.m_lblk < last_lblk) { map.m_len = last_lblk - map.m_lblk; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); if (err || map.m_len == 0) return false; map.m_lblk += map.m_len; } return true; } static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page; struct node_info ni; __u64 phys = 0, len; __u32 flags; nid_t xnid = F2FS_I(inode)->i_xattr_nid; int err = 0; if (f2fs_has_inline_xattr(inode)) { int offset; page = f2fs_grab_cache_page(NODE_MAPPING(sbi), inode->i_ino, false); if (!page) return -ENOMEM; err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { f2fs_put_page(page, 1); return err; } phys = F2FS_BLK_TO_BYTES(ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)); phys += offset; len = inline_xattr_size(inode); f2fs_put_page(page, 1); flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; if (!xnid) flags |= FIEMAP_EXTENT_LAST; err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); trace_f2fs_fiemap(inode, 0, phys, len, flags, err); if (err) return err; } if (xnid) { page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); if (!page) return -ENOMEM; err = f2fs_get_node_info(sbi, xnid, &ni, false); if (err) { f2fs_put_page(page, 1); return err; } phys = F2FS_BLK_TO_BYTES(ni.blk_addr); len = inode->i_sb->s_blocksize; f2fs_put_page(page, 1); flags = FIEMAP_EXTENT_LAST; } if (phys) { err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); trace_f2fs_fiemap(inode, 0, phys, len, flags, err); } return (err < 0 ? err : 0); } int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { struct f2fs_map_blocks map; sector_t start_blk, last_blk, blk_len, max_len; pgoff_t next_pgofs; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; int ret = 0; bool compr_cluster = false, compr_appended; unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; unsigned int count_in_cluster = 0; loff_t maxbytes; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { ret = f2fs_precache_extents(inode); if (ret) return ret; } ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR); if (ret) return ret; inode_lock_shared(inode); maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); if (start > maxbytes) { ret = -EFBIG; goto out; } if (len > maxbytes || (maxbytes - len) < start) len = maxbytes - start; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { ret = f2fs_xattr_fiemap(inode, fieinfo); goto out; } if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); if (ret != -EAGAIN) goto out; } start_blk = F2FS_BYTES_TO_BLK(start); last_blk = F2FS_BYTES_TO_BLK(start + len - 1); blk_len = last_blk - start_blk + 1; max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk; next: memset(&map, 0, sizeof(map)); map.m_lblk = start_blk; map.m_len = blk_len; map.m_next_pgofs = &next_pgofs; map.m_seg_type = NO_CHECK_TYPE; if (compr_cluster) { map.m_lblk += 1; map.m_len = cluster_size - count_in_cluster; } ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP); if (ret) goto out; /* HOLE */ if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) { start_blk = next_pgofs; if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes) goto prep_next; flags |= FIEMAP_EXTENT_LAST; } /* * current extent may cross boundary of inquiry, increase len to * requery. */ if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) && map.m_lblk + map.m_len - 1 == last_blk && blk_len != max_len) { blk_len = max_len; goto next; } compr_appended = false; /* In a case of compressed cluster, append this to the last extent */ if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) || !(map.m_flags & F2FS_MAP_FLAGS))) { compr_appended = true; goto skip_fill; } if (size) { flags |= FIEMAP_EXTENT_MERGED; if (IS_ENCRYPTED(inode)) flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); trace_f2fs_fiemap(inode, logical, phys, size, flags, ret); if (ret) goto out; size = 0; } if (start_blk > last_blk) goto out; skip_fill: if (map.m_pblk == COMPRESS_ADDR) { compr_cluster = true; count_in_cluster = 1; } else if (compr_appended) { unsigned int appended_blks = cluster_size - count_in_cluster + 1; size += F2FS_BLK_TO_BYTES(appended_blks); start_blk += appended_blks; compr_cluster = false; } else { logical = F2FS_BLK_TO_BYTES(start_blk); phys = __is_valid_data_blkaddr(map.m_pblk) ? F2FS_BLK_TO_BYTES(map.m_pblk) : 0; size = F2FS_BLK_TO_BYTES(map.m_len); flags = 0; if (compr_cluster) { flags = FIEMAP_EXTENT_ENCODED; count_in_cluster += map.m_len; if (count_in_cluster == cluster_size) { compr_cluster = false; size += F2FS_BLKSIZE; } } else if (map.m_flags & F2FS_MAP_DELALLOC) { flags = FIEMAP_EXTENT_UNWRITTEN; } start_blk += F2FS_BYTES_TO_BLK(size); } prep_next: cond_resched(); if (fatal_signal_pending(current)) ret = -EINTR; else goto next; out: if (ret == 1) ret = 0; inode_unlock_shared(inode); return ret; } static inline loff_t f2fs_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) return F2FS_BLK_TO_BYTES(max_file_blocks(inode)); return i_size_read(inode); } static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac) { return rac ? REQ_RAHEAD : 0; } static int f2fs_read_single_page(struct inode *inode, struct folio *folio, unsigned nr_pages, struct f2fs_map_blocks *map, struct bio **bio_ret, sector_t *last_block_in_bio, struct readahead_control *rac) { struct bio *bio = *bio_ret; const unsigned int blocksize = F2FS_BLKSIZE; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t block_nr; pgoff_t index = folio_index(folio); int ret = 0; block_in_file = (sector_t)index; last_block = block_in_file + nr_pages; last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + blocksize - 1); if (last_block > last_block_in_file) last_block = last_block_in_file; /* just zeroing out page which is beyond EOF */ if (block_in_file >= last_block) goto zero_out; /* * Map blocks using the previous result first. */ if ((map->m_flags & F2FS_MAP_MAPPED) && block_in_file > map->m_lblk && block_in_file < (map->m_lblk + map->m_len)) goto got_it; /* * Then do more f2fs_map_blocks() calls until we are * done with this page. */ map->m_lblk = block_in_file; map->m_len = last_block - block_in_file; ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT); if (ret) goto out; got_it: if ((map->m_flags & F2FS_MAP_MAPPED)) { block_nr = map->m_pblk + block_in_file - map->m_lblk; folio_set_mappedtodisk(folio); if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; goto out; } } else { zero_out: folio_zero_segment(folio, 0, folio_size(folio)); if (f2fs_need_verity(inode, index) && !fsverity_verify_folio(folio)) { ret = -EIO; goto out; } if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); folio_unlock(folio); goto out; } /* * This page will go to BIO. Do we need to send this * BIO off first? */ if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, *last_block_in_bio, block_nr) || !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, f2fs_ra_op_flags(rac), index, false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; goto out; } } /* * If the page is under writeback, we need to wait for * its completion to see the correct decrypted data. */ f2fs_wait_on_block_writeback(inode, block_nr); if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); *last_block_in_bio = block_nr; out: *bio_ret = bio; return ret; } #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, struct readahead_control *rac, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio = *bio_ret; unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; sector_t last_block_in_file; const unsigned int blocksize = F2FS_BLKSIZE; struct decompress_io_ctx *dic = NULL; struct extent_info ei = {}; bool from_dnode = true; int i; int ret = 0; f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + blocksize - 1); /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; struct folio *folio; if (!page) continue; folio = page_folio(page); if ((sector_t)folio->index >= last_block_in_file) { folio_zero_segment(folio, 0, folio_size(folio)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); } else if (!folio_test_uptodate(folio)) { continue; } folio_unlock(folio); if (for_write) folio_put(folio); cc->rpages[i] = NULL; cc->nr_rpages--; } /* we are done since all pages are beyond EOF */ if (f2fs_cluster_is_empty(cc)) goto out; if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei)) from_dnode = false; if (!from_dnode) goto skip_reading_dnode; set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (ret) goto out; if (unlikely(f2fs_cp_error(sbi))) { ret = -EIO; goto out_put_dnode; } f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); skip_reading_dnode: for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i) : ei.blk + i - 1; if (!__is_valid_data_blkaddr(blkaddr)) break; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { ret = -EFAULT; goto out_put_dnode; } cc->nr_cpages++; if (!from_dnode && i >= ei.c_len) break; } /* nothing to decompress */ if (cc->nr_cpages == 0) { ret = 0; goto out_put_dnode; } dic = f2fs_alloc_dic(cc); if (IS_ERR(dic)) { ret = PTR_ERR(dic); goto out_put_dnode; } for (i = 0; i < cc->nr_cpages; i++) { struct folio *folio = page_folio(dic->cpages[i]); block_t blkaddr; struct bio_post_read_ctx *ctx; blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1) : ei.blk + i; f2fs_wait_on_block_writeback(inode, blkaddr); if (f2fs_load_compressed_page(sbi, folio_page(folio, 0), blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); break; } continue; } if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(sbi, bio, DATA); bio = NULL; } if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, f2fs_ra_op_flags(rac), folio->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); f2fs_decompress_end_io(dic, ret, true); f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; } } if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; ctx = get_post_read_ctx(bio); ctx->enabled_steps |= STEP_DECOMPRESS; refcount_inc(&dic->refcnt); inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); *last_block_in_bio = blkaddr; } if (from_dnode) f2fs_put_dnode(&dn); *bio_ret = bio; return 0; out_put_dnode: if (from_dnode) f2fs_put_dnode(&dn); out: for (i = 0; i < cc->cluster_size; i++) { if (cc->rpages[i]) { ClearPageUptodate(cc->rpages[i]); unlock_page(cc->rpages[i]); } } *bio_ret = bio; return ret; } #endif /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. */ static int f2fs_mpage_readpages(struct inode *inode, struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; struct f2fs_map_blocks map; #ifdef CONFIG_F2FS_FS_COMPRESSION struct compress_ctx cc = { .inode = inode, .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, .cluster_size = F2FS_I(inode)->i_cluster_size, .cluster_idx = NULL_CLUSTER, .rpages = NULL, .cpages = NULL, .nr_rpages = 0, .nr_cpages = 0, }; pgoff_t nc_cluster_idx = NULL_CLUSTER; pgoff_t index; #endif unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; int ret = 0; map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; map.m_flags = 0; map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; map.m_may_create = false; for (; nr_pages; nr_pages--) { if (rac) { folio = readahead_folio(rac); prefetchw(&folio->flags); } #ifdef CONFIG_F2FS_FS_COMPRESSION index = folio_index(folio); if (!f2fs_compressed_file(inode)) goto read_single_page; /* there are remained compressed pages, submit them */ if (!f2fs_cluster_can_merge_page(&cc, index)) { ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, rac, false); f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; } if (cc.cluster_idx == NULL_CLUSTER) { if (nc_cluster_idx == index >> cc.log_cluster_size) goto read_single_page; ret = f2fs_is_compressed_cluster(inode, index); if (ret < 0) goto set_error_page; else if (!ret) { nc_cluster_idx = index >> cc.log_cluster_size; goto read_single_page; } nc_cluster_idx = NULL_CLUSTER; } ret = f2fs_init_compress_ctx(&cc); if (ret) goto set_error_page; f2fs_compress_ctx_add_page(&cc, folio); goto next_page; read_single_page: #endif ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map, &bio, &last_block_in_bio, rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: #endif folio_zero_segment(folio, 0, folio_size(folio)); folio_unlock(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION next_page: #endif #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { /* last page */ if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, rac, false); f2fs_destroy_compress_ctx(&cc, false); } } #endif } if (bio) f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); return ret; } static int f2fs_read_data_folio(struct file *file, struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; int ret = -EAGAIN; trace_f2fs_readpage(folio, DATA); if (!f2fs_is_compress_backend_ready(inode)) { folio_unlock(folio); return -EOPNOTSUPP; } /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, folio); if (ret == -EAGAIN) ret = f2fs_mpage_readpages(inode, NULL, folio); return ret; } static void f2fs_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac)); if (!f2fs_is_compress_backend_ready(inode)) return; /* If the file has inline data, skip readahead */ if (f2fs_has_inline_data(inode)) return; f2fs_mpage_readpages(inode, rac, NULL); } int f2fs_encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; struct page *mpage, *page; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; page = fio->compressed_page ? fio->compressed_page : fio->page; if (fscrypt_inode_uses_inline_crypto(inode)) return 0; retry_encrypt: fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE, 0, gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { f2fs_flush_merged_writes(fio->sbi); memalloc_retry_wait(GFP_NOFS); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } return PTR_ERR(fio->encrypted_page); } mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); if (mpage) { if (PageUptodate(mpage)) memcpy(page_address(mpage), page_address(fio->encrypted_page), PAGE_SIZE); f2fs_put_page(mpage, 1); } return 0; } static inline bool check_inplace_update_policy(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) && is_inode_flag_set(inode, FI_OPU_WRITE)) return false; if (IS_F2FS_IPU_FORCE(sbi)) return true; if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi)) return true; if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; /* * IPU for rewrite async pages */ if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE && !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode)) return true; /* this is only set during fdatasync */ if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU)) return true; if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) return true; return false; } bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) { /* swap file is migrating in aligned write mode */ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return false; if (f2fs_is_pinned_file(inode)) return true; /* if this is cold file, we should overwrite to avoid fragmentation */ if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE)) return true; return check_inplace_update_policy(inode, fio); } bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); /* The below cases were checked when setting it. */ if (f2fs_is_pinned_file(inode)) return false; if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK)) return true; if (f2fs_lfs_mode(sbi)) return true; if (S_ISDIR(inode->i_mode)) return true; if (IS_NOQUOTA(inode)) return true; if (f2fs_used_in_atomic_write(inode)) return true; /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ if (f2fs_compressed_file(inode) && F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER && is_inode_flag_set(inode, FI_ENABLE_COMPRESS)) return true; /* swap file is migrating in aligned write mode */ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return true; if (is_inode_flag_set(inode, FI_OPU_WRITE)) return true; if (fio) { if (page_private_gcing(fio->page)) return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) return true; } return false; } static inline bool need_inplace_update(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; if (f2fs_should_update_outplace(inode, fio)) return false; return f2fs_should_update_inplace(inode, fio); } int f2fs_do_write_data_page(struct f2fs_io_info *fio) { struct folio *folio = page_folio(fio->page); struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ atomic_commit = f2fs_is_atomic_file(inode) && page_private_atomic(folio_page(folio, 0)); if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && f2fs_lookup_read_extent_cache_block(inode, folio->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) return -EFSCORRUPTED; ipu_force = true; fio->need_lock = LOCK_DONE; goto got_it; } /* Deadlock due to between page->lock and f2fs_lock_op */ if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); if (err) goto out; fio->old_blkaddr = dn.data_blkaddr; /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { folio_clear_uptodate(folio); clear_page_private_gcing(folio_page(folio, 0)); goto out_writepage; } got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; goto out_writepage; } /* wait for GCed page writeback via META_MAPPING */ if (fio->meta_gc) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); /* * If current allocation needs SSR, * it had better in-place writes for updated data. */ if (ipu_force || (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; folio_start_writeback(folio); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); folio_end_writeback(folio); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } trace_f2fs_do_write_data_page(folio, IPU); return err; } if (fio->need_lock == LOCK_RETRY) { if (!f2fs_trylock_op(fio->sbi)) { err = -EAGAIN; goto out_writepage; } fio->need_lock = LOCK_REQ; } err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false); if (err) goto out_writepage; fio->version = ni.version; err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; folio_start_writeback(folio); if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); if (atomic_commit) clear_page_private_atomic(folio_page(folio, 0)); out_writepage: f2fs_put_dnode(&dn); out: if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); return err; } int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, int compr_blocks, bool allow_balance) { struct inode *inode = folio->mapping->host; struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; bool quota_inode = IS_NOQUOTA(inode); int err = 0; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, .last_block = last_block, }; trace_f2fs_writepage(folio, DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { mapping_set_error(folio->mapping, -EIO); /* * don't drop any dirty dentry pages for keeping lastest * directory structure. */ if (S_ISDIR(inode->i_mode) && !is_sbi_flag_set(sbi, SBI_IS_CLOSE)) goto redirty_out; /* keep data pages in remount-ro mode */ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) goto redirty_out; goto out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (folio->index < end_index || f2fs_verity_in_progress(inode) || compr_blocks) goto write; /* * If the offset is out-of-range of file size, * this page does not have to be written to disk. */ offset = i_size & (PAGE_SIZE - 1); if ((folio->index >= end_index + 1) || !offset) goto out; folio_zero_segment(folio, offset, folio_size(folio)); write: /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { /* * We need to wait for node_write to avoid block allocation during * checkpoint. This can only happen to quota writes which can cause * the below discard race condition. */ if (quota_inode) f2fs_down_read(&sbi->node_write); fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); if (quota_inode) f2fs_up_read(&sbi->node_write); goto done; } if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0, 0)) goto redirty_out; else set_inode_flag(inode, FI_HOT_DATA); err = -EAGAIN; if (f2fs_has_inline_data(inode)) { err = f2fs_write_inline_data(inode, folio); if (!err) goto out; } if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } } if (err) { file_set_keep_isize(inode); } else { spin_lock(&F2FS_I(inode)->i_size_lock); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; spin_unlock(&F2FS_I(inode)->i_size_lock); } done: if (err && err != -ENOENT) goto redirty_out; out: inode_dec_dirty_pages(inode); if (err) { folio_clear_uptodate(folio); clear_page_private_gcing(page); } if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); clear_inode_flag(inode, FI_HOT_DATA); f2fs_remove_dirty_inode(inode); submitted = NULL; } folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, DATA); if (bio && *bio) f2fs_submit_merged_ipu_write(sbi, bio, NULL); submitted = NULL; } if (submitted) *submitted = fio.submitted; return 0; redirty_out: folio_redirty_for_writepage(wbc, folio); /* * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). * file_write_and_wait_range() will see EIO error, which is critical * to return value of fsync() followed by atomic_write failure to user. */ if (!err || wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; folio_unlock(folio); return err; } static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { struct folio *folio = page_folio(page); #ifdef CONFIG_F2FS_FS_COMPRESSION struct inode *inode = folio->mapping->host; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) goto out; if (f2fs_compressed_file(inode)) { if (f2fs_is_compressed_cluster(inode, folio->index)) { folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } } out: #endif return f2fs_write_single_data_page(folio, NULL, NULL, NULL, wbc, FS_DATA_IO, 0, true); } /* * This function was copied from write_cache_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) { int ret = 0; int done = 0, retry = 0; struct page *pages_local[F2FS_ONSTACK_PAGES]; struct page **pages = pages_local; struct folio_batch fbatch; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; sector_t last_block; #ifdef CONFIG_F2FS_FS_COMPRESSION struct inode *inode = mapping->host; struct compress_ctx cc = { .inode = inode, .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, .cluster_size = F2FS_I(inode)->i_cluster_size, .cluster_idx = NULL_CLUSTER, .rpages = NULL, .nr_rpages = 0, .cpages = NULL, .valid_nr_cpages = 0, .rbuf = NULL, .cbuf = NULL, .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, .private = NULL, }; #endif int nr_folios, p, idx; int nr_pages; unsigned int max_pages = F2FS_ONSTACK_PAGES; pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; int range_whole = 0; xa_mark_t tag; int nwritten = 0; int submitted = 0; int i; #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode) && 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) { pages = f2fs_kzalloc(sbi, sizeof(struct page *) << cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL); max_pages = 1 << cc.log_cluster_size; } #endif folio_batch_init(&fbatch); if (get_dirty_pages(mapping->host) <= SM_I(F2FS_M_SB(mapping))->min_hot_blocks) set_inode_flag(mapping->host, FI_HOT_DATA); else clear_inode_flag(mapping->host, FI_HOT_DATA); if (wbc->range_cyclic) { index = mapping->writeback_index; /* prev offset */ end = -1; } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; retry: retry = 0; if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && !retry && (index <= end)) { nr_pages = 0; again: nr_folios = filemap_get_folios_tag(mapping, &index, end, tag, &fbatch); if (nr_folios == 0) { if (nr_pages) goto write; break; } for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; idx = 0; p = folio_nr_pages(folio); add_more: pages[nr_pages] = folio_page(folio, idx); folio_get(folio); if (++nr_pages == max_pages) { index = folio->index + idx + 1; folio_batch_release(&fbatch); goto write; } if (++idx < p) goto add_more; } folio_batch_release(&fbatch); goto again; write: for (i = 0; i < nr_pages; i++) { struct page *page = pages[i]; struct folio *folio = page_folio(page); bool need_readd; readd: need_readd = false; #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { void *fsdata = NULL; struct page *pagep; int ret2; ret = f2fs_init_compress_ctx(&cc); if (ret) { done = 1; break; } if (!f2fs_cluster_can_merge_page(&cc, folio->index)) { ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); if (!ret) need_readd = true; goto result; } if (unlikely(f2fs_cp_error(sbi))) goto lock_folio; if (!f2fs_cluster_is_empty(&cc)) goto lock_folio; if (f2fs_all_cluster_page_ready(&cc, pages, i, nr_pages, true)) goto lock_folio; ret2 = f2fs_prepare_compress_overwrite( inode, &pagep, folio->index, &fsdata); if (ret2 < 0) { ret = ret2; done = 1; break; } else if (ret2 && (!f2fs_compress_write_end(inode, fsdata, folio->index, 1) || !f2fs_all_cluster_page_ready(&cc, pages, i, nr_pages, false))) { retry = 1; break; } } #endif /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[DATA]) && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } #ifdef CONFIG_F2FS_FS_COMPRESSION lock_folio: #endif done_index = folio->index; retry_write: folio_lock(folio); if (unlikely(folio->mapping != mapping)) { continue_unlock: folio_unlock(folio); continue; } if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } if (folio_test_writeback(folio)) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; f2fs_wait_on_page_writeback(&folio->page, DATA, true, true); } if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { folio_get(folio); f2fs_compress_ctx_add_page(&cc, folio); continue; } #endif ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); if (ret == AOP_WRITEPAGE_ACTIVATE) folio_unlock(folio); #ifdef CONFIG_F2FS_FS_COMPRESSION result: #endif nwritten += submitted; wbc->nr_to_write -= submitted; if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to * get # of written pages. */ if (ret == AOP_WRITEPAGE_ACTIVATE) { ret = 0; goto next; } else if (ret == -EAGAIN) { ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { f2fs_io_schedule_timeout( DEFAULT_IO_TIMEOUT); goto retry_write; } goto next; } done_index = folio_next_index(folio); done = 1; break; } if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } next: if (need_readd) goto readd; } release_pages(pages, nr_pages); cond_resched(); } #ifdef CONFIG_F2FS_FS_COMPRESSION /* flush remained pages in compress cluster */ if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) { ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); nwritten += submitted; wbc->nr_to_write -= submitted; if (ret) { done = 1; retry = 0; } } if (f2fs_compressed_file(inode)) f2fs_destroy_compress_ctx(&cc, false); #endif if (retry) { index = 0; end = -1; goto retry; } if (wbc->range_cyclic && !done) done_index = 0; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, NULL, 0, DATA); /* submit cached bio of IPU write */ if (bio) f2fs_submit_merged_ipu_write(sbi, &bio, NULL); #ifdef CONFIG_F2FS_FS_COMPRESSION if (pages != pages_local) kfree(pages); #endif return ret; } static inline bool __should_serialize_io(struct inode *inode, struct writeback_control *wbc) { /* to avoid deadlock in path of data flush */ if (F2FS_I(inode)->wb_task) return false; if (!S_ISREG(inode->i_mode)) return false; if (IS_NOQUOTA(inode)) return false; if (f2fs_need_compress_data(inode)) return true; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) return true; return false; } static int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct blk_plug plug; int ret; bool locked = false; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; /* skip writing if there is no dirty page in this inode */ if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) return 0; /* during POR, we don't need to trigger writepage at all. */ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; /* skip writing in file defragment preparing stage */ if (is_inode_flag_set(inode, FI_SKIP_WRITES)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, DATA); /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); else if (atomic_read(&sbi->wb_sync_req[DATA])) { /* to avoid potential deadlock */ if (current->plug) blk_finish_plug(current->plug); goto skip_write; } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); locked = true; } blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); if (locked) mutex_unlock(&sbi->writepages); if (wbc->sync_mode == WB_SYNC_ALL) atomic_dec(&sbi->wb_sync_req[DATA]); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. */ f2fs_remove_dirty_inode(inode); return ret; skip_write: wbc->pages_skipped += get_dirty_pages(inode); trace_f2fs_writepages(mapping->host, wbc, DATA); return 0; } static int f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; return __f2fs_write_data_pages(mapping, wbc, F2FS_I(inode)->cp_task == current ? FS_CP_DATA_IO : FS_DATA_IO); } void f2fs_write_failed(struct inode *inode, loff_t to) { loff_t i_size = i_size_read(inode); if (IS_NOQUOTA(inode)) return; /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ if (to > i_size && !f2fs_verity_in_progress(inode)) { f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_pagecache(inode, i_size); f2fs_truncate_blocks(inode, i_size, true); filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } static int prepare_write_begin(struct f2fs_sb_info *sbi, struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed) { struct inode *inode = folio->mapping->host; pgoff_t index = folio->index; struct dnode_of_data dn; struct page *ipage; bool locked = false; int flag = F2FS_GET_BLOCK_PRE_AIO; int err = 0; /* * If a whole page is being written and we already preallocated all the * blocks, then there is no need to get a block address now. */ if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) return 0; /* f2fs_lock_op avoids race between write CP and convert_inline_page */ if (f2fs_has_inline_data(inode)) { if (pos + len > MAX_INLINE_DATA(inode)) flag = F2FS_GET_BLOCK_DEFAULT; f2fs_map_lock(sbi, flag); locked = true; } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { f2fs_map_lock(sbi, flag); locked = true; } restart: /* check inline_data */ ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto unlock_out; } set_new_dnode(&dn, inode, ipage, ipage, 0); if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { f2fs_do_read_inline_data(folio, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_page_private_inline(ipage); goto out; } err = f2fs_convert_inline_page(&dn, folio_page(folio, 0)); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { if (IS_DEVICE_ALIASING(inode)) { err = -ENODATA; goto out; } if (locked) { err = f2fs_reserve_block(&dn, index); goto out; } /* hole case */ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (!err && dn.data_blkaddr != NULL_ADDR) goto out; f2fs_put_dnode(&dn); f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; goto restart; } out: if (!err) { /* convert_inline_page can make node_changed */ *blk_addr = dn.data_blkaddr; *node_changed = dn.node_changed; } f2fs_put_dnode(&dn); unlock_out: if (locked) f2fs_map_unlock(sbi, flag); return err; } static int __find_data_block(struct inode *inode, pgoff_t index, block_t *blk_addr) { struct dnode_of_data dn; struct page *ipage; int err = 0; ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); set_new_dnode(&dn, inode, ipage, ipage, 0); if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { /* hole case */ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { dn.data_blkaddr = NULL_ADDR; err = 0; } } *blk_addr = dn.data_blkaddr; f2fs_put_dnode(&dn); return err; } static int __reserve_data_block(struct inode *inode, pgoff_t index, block_t *blk_addr, bool *node_changed) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; struct page *ipage; int err = 0; f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto unlock_out; } set_new_dnode(&dn, inode, ipage, ipage, 0); if (!f2fs_lookup_read_extent_cache_block(dn.inode, index, &dn.data_blkaddr)) err = f2fs_reserve_block(&dn, index); *blk_addr = dn.data_blkaddr; *node_changed = dn.node_changed; f2fs_put_dnode(&dn); unlock_out: f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO); return err; } static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed, bool *use_cow) { struct inode *inode = folio->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; /* If pos is beyond the end of file, reserve a new block in COW inode */ if ((pos & PAGE_MASK) >= i_size_read(inode)) goto reserve_block; /* Look for the block in COW inode first */ err = __find_data_block(cow_inode, index, blk_addr); if (err) { return err; } else if (*blk_addr != NULL_ADDR) { *use_cow = true; return 0; } if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) goto reserve_block; /* Look for the block in the original inode */ err = __find_data_block(inode, index, &ori_blk_addr); if (err) return err; reserve_block: /* Finally, we should reserve a new block in COW inode for the update */ err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); if (err) return err; inc_atomic_write_cnt(inode); if (ori_blk_addr != NULL_ADDR) *blk_addr = ori_blk_addr; return 0; } static int f2fs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct folio *folio; pgoff_t index = pos >> PAGE_SHIFT; bool need_balance = false; bool use_cow = false; block_t blkaddr = NULL_ADDR; int err = 0; trace_f2fs_write_begin(inode, pos, len); if (!f2fs_is_checkpoint_ready(sbi)) { err = -ENOSPC; goto fail; } /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: * folio_lock(folio #0) -> folio_lock(inode_page) */ if (index != 0) { err = f2fs_convert_inline_inode(inode); if (err) goto fail; } #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { int ret; struct page *page; *fsdata = NULL; if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; ret = f2fs_prepare_compress_overwrite(inode, &page, index, fsdata); if (ret < 0) { err = ret; goto fail; } else if (ret) { *foliop = page_folio(page); return 0; } } #endif repeat: /* * Do not use FGP_STABLE to avoid deadlock. * Will wait that below with our IO control. */ folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; } /* TODO: cluster can be compressed due to race with .writepage */ *foliop = folio; if (f2fs_is_atomic_file(inode)) err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); else err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) goto put_folio; if (need_balance && !IS_NOQUOTA(inode) && has_not_enough_free_secs(sbi, 0, 0)) { folio_unlock(folio); f2fs_balance_fs(sbi, true); folio_lock(folio); if (folio->mapping != mapping) { /* The folio got truncated from under us */ folio_unlock(folio); folio_put(folio); goto repeat; } } f2fs_wait_on_page_writeback(&folio->page, DATA, false, true); if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && !f2fs_verity_in_progress(inode)) { folio_zero_segment(folio, len, folio_size(folio)); return 0; } if (blkaddr == NEW_ADDR) { folio_zero_segment(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; goto put_folio; } err = f2fs_submit_page_read(use_cow ? F2FS_I(inode)->cow_inode : inode, folio, blkaddr, 0, true); if (err) goto put_folio; folio_lock(folio); if (unlikely(folio->mapping != mapping)) { folio_unlock(folio); folio_put(folio); goto repeat; } if (unlikely(!folio_test_uptodate(folio))) { err = -EIO; goto put_folio; } } return 0; put_folio: folio_unlock(folio); folio_put(folio); fail: f2fs_write_failed(inode, pos + len); return err; } static int f2fs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { struct inode *inode = folio->mapping->host; trace_f2fs_write_end(inode, pos, len, copied); /* * This should be come from len == PAGE_SIZE, and we expect copied * should be PAGE_SIZE. Otherwise, we treat it with zero copied and * let generic_perform_write() try to copy data again through copied=0. */ if (!folio_test_uptodate(folio)) { if (unlikely(copied != len)) copied = 0; else folio_mark_uptodate(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION /* overwrite compressed file */ if (f2fs_compressed_file(inode) && fsdata) { f2fs_compress_write_end(inode, fsdata, folio->index, copied); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) f2fs_i_size_write(inode, pos + copied); return copied; } #endif if (!copied) goto unlock_out; folio_mark_dirty(folio); if (f2fs_is_atomic_file(inode)) set_page_private_atomic(folio_page(folio, 0)); if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { f2fs_i_size_write(inode, pos + copied); if (f2fs_is_atomic_file(inode)) f2fs_i_size_write(F2FS_I(inode)->cow_inode, pos + copied); } unlock_out: folio_unlock(folio); folio_put(folio); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct inode *inode = folio->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (inode->i_ino >= F2FS_ROOT_INO(sbi) && (offset || length != folio_size(folio))) return; if (folio_test_dirty(folio)) { if (inode->i_ino == F2FS_META_INO(sbi)) { dec_page_count(sbi, F2FS_DIRTY_META); } else if (inode->i_ino == F2FS_NODE_INO(sbi)) { dec_page_count(sbi, F2FS_DIRTY_NODES); } else { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); } } clear_page_private_all(&folio->page); } bool f2fs_release_folio(struct folio *folio, gfp_t wait) { /* If this is dirty folio, keep private data */ if (folio_test_dirty(folio)) return false; clear_page_private_all(&folio->page); return true; } static bool f2fs_dirty_data_folio(struct address_space *mapping, struct folio *folio) { struct inode *inode = mapping->host; trace_f2fs_set_page_dirty(folio, DATA); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); BUG_ON(folio_test_swapcache(folio)); if (filemap_dirty_folio(mapping, folio)) { f2fs_update_dirty_folio(inode, folio); return true; } return false; } static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) { #ifdef CONFIG_F2FS_FS_COMPRESSION struct dnode_of_data dn; sector_t start_idx, blknr = 0; int ret; start_idx = round_down(block, F2FS_I(inode)->i_cluster_size); set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (ret) return 0; if (dn.data_blkaddr != COMPRESS_ADDR) { dn.ofs_in_node += block - start_idx; blknr = f2fs_data_blkaddr(&dn); if (!__is_valid_data_blkaddr(blknr)) blknr = 0; } f2fs_put_dnode(&dn); return blknr; #else return 0; #endif } static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; sector_t blknr = 0; if (f2fs_has_inline_data(inode)) goto out; /* make sure allocating whole blocks */ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) filemap_write_and_wait(mapping); /* Block number less than F2FS MAX BLOCKS */ if (unlikely(block >= max_file_blocks(inode))) goto out; if (f2fs_compressed_file(inode)) { blknr = f2fs_bmap_compress(inode, block); } else { struct f2fs_map_blocks map; memset(&map, 0, sizeof(map)); map.m_lblk = block; map.m_len = 1; map.m_next_pgofs = NULL; map.m_seg_type = NO_CHECK_TYPE; if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP)) blknr = map.m_pblk; } out: trace_f2fs_bmap(inode, block, blknr); return blknr; } #ifdef CONFIG_SWAP static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int blkcnt) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int blkofs; unsigned int blk_per_sec = BLKS_PER_SEC(sbi); unsigned int end_blk = start_blk + blkcnt - 1; unsigned int secidx = start_blk / blk_per_sec; unsigned int end_sec; int ret = 0; if (!blkcnt) return 0; end_sec = end_blk / blk_per_sec; f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); set_inode_flag(inode, FI_OPU_WRITE); for (; secidx <= end_sec; secidx++) { unsigned int blkofs_end = secidx == end_sec ? end_blk % blk_per_sec : blk_per_sec - 1; f2fs_down_write(&sbi->pin_sem); ret = f2fs_allocate_pinning_section(sbi); if (ret) { f2fs_up_write(&sbi->pin_sem); break; } set_inode_flag(inode, FI_SKIP_WRITES); for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { struct page *page; unsigned int blkidx = secidx * blk_per_sec + blkofs; page = f2fs_get_lock_data_page(inode, blkidx, true); if (IS_ERR(page)) { f2fs_up_write(&sbi->pin_sem); ret = PTR_ERR(page); goto done; } set_page_dirty(page); f2fs_put_page(page, 1); } clear_inode_flag(inode, FI_SKIP_WRITES); ret = filemap_fdatawrite(inode->i_mapping); f2fs_up_write(&sbi->pin_sem); if (ret) break; } done: clear_inode_flag(inode, FI_SKIP_WRITES); clear_inode_flag(inode, FI_OPU_WRITE); clear_inode_flag(inode, FI_ALIGNED_WRITE); filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } static int check_swap_activate(struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t cur_lblock; block_t last_lblock; block_t pblock; block_t lowest_pblock = -1; block_t highest_pblock = 0; int nr_extents = 0; unsigned int nr_pblocks; unsigned int blks_per_sec = BLKS_PER_SEC(sbi); unsigned int not_aligned = 0; int ret = 0; /* * Map all the blocks into the extent list. This code doesn't try * to be very smart. */ cur_lblock = 0; last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode)); while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; retry: cond_resched(); memset(&map, 0, sizeof(map)); map.m_lblk = cur_lblock; map.m_len = last_lblock - cur_lblock; map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; map.m_may_create = false; ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP); if (ret) goto out; /* hole */ if (!(map.m_flags & F2FS_MAP_FLAGS)) { f2fs_err(sbi, "Swapfile has holes"); ret = -EINVAL; goto out; } pblock = map.m_pblk; nr_pblocks = map.m_len; if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || nr_pblocks % blks_per_sec || !f2fs_valid_pinned_area(sbi, pblock)) { bool last_extent = false; not_aligned++; nr_pblocks = roundup(nr_pblocks, blks_per_sec); if (cur_lblock + nr_pblocks > sis->max) nr_pblocks -= blks_per_sec; /* this extent is last one */ if (!nr_pblocks) { nr_pblocks = last_lblock - cur_lblock; last_extent = true; } ret = f2fs_migrate_blocks(inode, cur_lblock, nr_pblocks); if (ret) { if (ret == -ENOENT) ret = -EINVAL; goto out; } if (!last_extent) goto retry; } if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; if (cur_lblock) { /* exclude the header page */ if (pblock < lowest_pblock) lowest_pblock = pblock; if (pblock + nr_pblocks - 1 > highest_pblock) highest_pblock = pblock + nr_pblocks - 1; } /* * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks */ ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock); if (ret < 0) goto out; nr_extents += ret; cur_lblock += nr_pblocks; } ret = nr_extents; *span = 1 + highest_pblock - lowest_pblock; if (cur_lblock == 0) cur_lblock = 1; /* force Empty message */ sis->max = cur_lblock; sis->pages = cur_lblock - 1; sis->highest_bit = cur_lblock - 1; out: if (not_aligned) f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)", not_aligned, blks_per_sec * F2FS_BLKSIZE); return ret; } static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!S_ISREG(inode->i_mode)) return -EINVAL; if (f2fs_readonly(sbi->sb)) return -EROFS; if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { f2fs_err(sbi, "Swapfile not supported in LFS mode"); return -EINVAL; } ret = f2fs_convert_inline_inode(inode); if (ret) return ret; if (!f2fs_disable_compressed_file(inode)) return -EINVAL; ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) return ret; f2fs_precache_extents(inode); ret = check_swap_activate(sis, file, span); if (ret < 0) return ret; stat_inc_swapfile_inode(inode); set_inode_flag(inode, FI_PIN_FILE); f2fs_update_time(sbi, REQ_TIME); return ret; } static void f2fs_swap_deactivate(struct file *file) { struct inode *inode = file_inode(file); stat_dec_swapfile_inode(inode); clear_inode_flag(inode, FI_PIN_FILE); } #else static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { return -EOPNOTSUPP; } static void f2fs_swap_deactivate(struct file *file) { } #endif const struct address_space_operations f2fs_dblock_aops = { .read_folio = f2fs_read_data_folio, .readahead = f2fs_readahead, .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, .dirty_folio = f2fs_dirty_data_folio, .migrate_folio = filemap_migrate_folio, .invalidate_folio = f2fs_invalidate_folio, .release_folio = f2fs_release_folio, .bmap = f2fs_bmap, .swap_activate = f2fs_swap_activate, .swap_deactivate = f2fs_swap_deactivate, }; void f2fs_clear_page_cache_dirty_tag(struct folio *folio) { struct address_space *mapping = folio->mapping; unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); xa_unlock_irqrestore(&mapping->i_pages, flags); } int __init f2fs_init_post_read_processing(void) { bio_post_read_ctx_cache = kmem_cache_create("f2fs_bio_post_read_ctx", sizeof(struct bio_post_read_ctx), 0, 0, NULL); if (!bio_post_read_ctx_cache) goto fail; bio_post_read_ctx_pool = mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, bio_post_read_ctx_cache); if (!bio_post_read_ctx_pool) goto fail_free_cache; return 0; fail_free_cache: kmem_cache_destroy(bio_post_read_ctx_cache); fail: return -ENOMEM; } void f2fs_destroy_post_read_processing(void) { mempool_destroy(bio_post_read_ctx_pool); kmem_cache_destroy(bio_post_read_ctx_cache); } int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) { if (!f2fs_sb_has_encrypt(sbi) && !f2fs_sb_has_verity(sbi) && !f2fs_sb_has_compression(sbi)) return 0; sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", WQ_UNBOUND | WQ_HIGHPRI, num_online_cpus()); return sbi->post_read_wq ? 0 : -ENOMEM; } void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) { if (sbi->post_read_wq) destroy_workqueue(sbi->post_read_wq); } int __init f2fs_init_bio_entry_cache(void) { bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", sizeof(struct bio_entry)); return bio_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_bio_entry_cache(void) { kmem_cache_destroy(bio_entry_slab); } static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct f2fs_map_blocks map = {}; pgoff_t next_pgofs = 0; int err; map.m_lblk = F2FS_BYTES_TO_BLK(offset); map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1; map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); if (flags & IOMAP_WRITE) map.m_may_create = true; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); if (err) return err; iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk); /* * When inline encryption is enabled, sometimes I/O to an encrypted file * has to be broken up to guarantee DUN contiguity. Handle this by * limiting the length of the mapping returned. */ map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); /* * We should never see delalloc or compressed extents here based on * prior flushing and checks. */ if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR)) return -EINVAL; if (map.m_flags & F2FS_MAP_MAPPED) { if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) return -EINVAL; iomap->length = F2FS_BLK_TO_BYTES(map.m_len); iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); } else { if (flags & IOMAP_WRITE) return -ENOTBLK; if (map.m_pblk == NULL_ADDR) { iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) - iomap->offset; iomap->type = IOMAP_HOLE; } else if (map.m_pblk == NEW_ADDR) { iomap->length = F2FS_BLK_TO_BYTES(map.m_len); iomap->type = IOMAP_UNWRITTEN; } else { f2fs_bug_on(F2FS_I_SB(inode), 1); } iomap->addr = IOMAP_NULL_ADDR; } if (map.m_flags & F2FS_MAP_NEW) iomap->flags |= IOMAP_F_NEW; if ((inode->i_state & I_DIRTY_DATASYNC) || offset + length > i_size_read(inode)) iomap->flags |= IOMAP_F_DIRTY; return 0; } const struct iomap_ops f2fs_iomap_ops = { .iomap_begin = f2fs_iomap_begin, }; |
| 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 1 1 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2014 Michael Krufky (mkrufky@linuxtv.org) * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include <linux/vmalloc.h> #include <linux/i2c.h> #include <media/tuner.h> #include "mxl111sf.h" #include "mxl111sf-reg.h" #include "mxl111sf-phy.h" #include "mxl111sf-i2c.h" #include "mxl111sf-gpio.h" #include "mxl111sf-demod.h" #include "mxl111sf-tuner.h" #include "lgdt3305.h" #include "lg2160.h" int dvb_usb_mxl111sf_debug; module_param_named(debug, dvb_usb_mxl111sf_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=info, 2=xfer, 4=i2c, 8=reg, 16=adv (or-able))."); static int dvb_usb_mxl111sf_isoc; module_param_named(isoc, dvb_usb_mxl111sf_isoc, int, 0644); MODULE_PARM_DESC(isoc, "enable usb isoc xfer (0=bulk, 1=isoc)."); static int dvb_usb_mxl111sf_spi; module_param_named(spi, dvb_usb_mxl111sf_spi, int, 0644); MODULE_PARM_DESC(spi, "use spi rather than tp for data xfer (0=tp, 1=spi)."); #define ANT_PATH_AUTO 0 #define ANT_PATH_EXTERNAL 1 #define ANT_PATH_INTERNAL 2 static int dvb_usb_mxl111sf_rfswitch = #if 0 ANT_PATH_AUTO; #else ANT_PATH_EXTERNAL; #endif module_param_named(rfswitch, dvb_usb_mxl111sf_rfswitch, int, 0644); MODULE_PARM_DESC(rfswitch, "force rf switch position (0=auto, 1=ext, 2=int)."); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); int mxl111sf_ctrl_msg(struct mxl111sf_state *state, u8 cmd, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { struct dvb_usb_device *d = state->d; int wo = (rbuf == NULL || rlen == 0); /* write-only */ int ret; if (1 + wlen > MXL_MAX_XFER_SIZE) { pr_warn("%s: len=%d is too big!\n", __func__, wlen); return -EOPNOTSUPP; } pr_debug("%s(wlen = %d, rlen = %d)\n", __func__, wlen, rlen); mutex_lock(&state->msg_lock); memset(state->sndbuf, 0, 1+wlen); memset(state->rcvbuf, 0, rlen); state->sndbuf[0] = cmd; memcpy(&state->sndbuf[1], wbuf, wlen); ret = (wo) ? dvb_usbv2_generic_write(d, state->sndbuf, 1+wlen) : dvb_usbv2_generic_rw(d, state->sndbuf, 1+wlen, state->rcvbuf, rlen); if (rbuf) memcpy(rbuf, state->rcvbuf, rlen); mutex_unlock(&state->msg_lock); mxl_fail(ret); return ret; } /* ------------------------------------------------------------------------ */ #define MXL_CMD_REG_READ 0xaa #define MXL_CMD_REG_WRITE 0x55 int mxl111sf_read_reg(struct mxl111sf_state *state, u8 addr, u8 *data) { u8 buf[2]; int ret; ret = mxl111sf_ctrl_msg(state, MXL_CMD_REG_READ, &addr, 1, buf, 2); if (mxl_fail(ret)) { mxl_debug("error reading reg: 0x%02x", addr); goto fail; } if (buf[0] == addr) *data = buf[1]; else { pr_err("invalid response reading reg: 0x%02x != 0x%02x, 0x%02x", addr, buf[0], buf[1]); ret = -EINVAL; } pr_debug("R: (0x%02x, 0x%02x)\n", addr, buf[1]); fail: return ret; } int mxl111sf_write_reg(struct mxl111sf_state *state, u8 addr, u8 data) { u8 buf[] = { addr, data }; int ret; pr_debug("W: (0x%02x, 0x%02x)\n", addr, data); ret = mxl111sf_ctrl_msg(state, MXL_CMD_REG_WRITE, buf, 2, NULL, 0); if (mxl_fail(ret)) pr_err("error writing reg: 0x%02x, val: 0x%02x", addr, data); return ret; } /* ------------------------------------------------------------------------ */ int mxl111sf_write_reg_mask(struct mxl111sf_state *state, u8 addr, u8 mask, u8 data) { int ret; u8 val = 0; if (mask != 0xff) { ret = mxl111sf_read_reg(state, addr, &val); #if 1 /* don't know why this usually errors out on the first try */ if (mxl_fail(ret)) pr_err("error writing addr: 0x%02x, mask: 0x%02x, data: 0x%02x, retrying...", addr, mask, data); ret = mxl111sf_read_reg(state, addr, &val); #endif if (mxl_fail(ret)) goto fail; } val &= ~mask; val |= data; ret = mxl111sf_write_reg(state, addr, val); mxl_fail(ret); fail: return ret; } /* ------------------------------------------------------------------------ */ int mxl111sf_ctrl_program_regs(struct mxl111sf_state *state, struct mxl111sf_reg_ctrl_info *ctrl_reg_info) { int i, ret = 0; for (i = 0; ctrl_reg_info[i].addr | ctrl_reg_info[i].mask | ctrl_reg_info[i].data; i++) { ret = mxl111sf_write_reg_mask(state, ctrl_reg_info[i].addr, ctrl_reg_info[i].mask, ctrl_reg_info[i].data); if (mxl_fail(ret)) { pr_err("failed on reg #%d (0x%02x)", i, ctrl_reg_info[i].addr); break; } } return ret; } /* ------------------------------------------------------------------------ */ static int mxl1x1sf_get_chip_info(struct mxl111sf_state *state) { int ret; u8 id, ver; char *mxl_chip, *mxl_rev; if ((state->chip_id) && (state->chip_ver)) return 0; ret = mxl111sf_read_reg(state, CHIP_ID_REG, &id); if (mxl_fail(ret)) goto fail; state->chip_id = id; ret = mxl111sf_read_reg(state, TOP_CHIP_REV_ID_REG, &ver); if (mxl_fail(ret)) goto fail; state->chip_ver = ver; switch (id) { case 0x61: mxl_chip = "MxL101SF"; break; case 0x63: mxl_chip = "MxL111SF"; break; default: mxl_chip = "UNKNOWN MxL1X1"; break; } switch (ver) { case 0x36: state->chip_rev = MXL111SF_V6; mxl_rev = "v6"; break; case 0x08: state->chip_rev = MXL111SF_V8_100; mxl_rev = "v8_100"; break; case 0x18: state->chip_rev = MXL111SF_V8_200; mxl_rev = "v8_200"; break; default: state->chip_rev = 0; mxl_rev = "UNKNOWN REVISION"; break; } pr_info("%s detected, %s (0x%x)", mxl_chip, mxl_rev, ver); fail: return ret; } #define get_chip_info(state) \ ({ \ int ___ret; \ ___ret = mxl1x1sf_get_chip_info(state); \ if (mxl_fail(___ret)) { \ mxl_debug("failed to get chip info" \ " on first probe attempt"); \ ___ret = mxl1x1sf_get_chip_info(state); \ if (mxl_fail(___ret)) \ pr_err("failed to get chip info during probe"); \ else \ mxl_debug("probe needed a retry " \ "in order to succeed."); \ } \ ___ret; \ }) /* ------------------------------------------------------------------------ */ #if 0 static int mxl111sf_power_ctrl(struct dvb_usb_device *d, int onoff) { /* power control depends on which adapter is being woken: * save this for init, instead, via mxl111sf_adap_fe_init */ return 0; } #endif static int mxl111sf_adap_fe_init(struct dvb_frontend *fe) { struct dvb_usb_device *d = fe_to_d(fe); struct mxl111sf_state *state = fe_to_priv(fe); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe->id]; int err; /* exit if we didn't initialize the driver yet */ if (!state->chip_id) { mxl_debug("driver not yet initialized, exit."); goto fail; } pr_debug("%s()\n", __func__); mutex_lock(&state->fe_lock); state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); err = mxl1x1sf_soft_reset(state); mxl_fail(err); err = mxl111sf_init_tuner_demod(state); mxl_fail(err); err = mxl1x1sf_set_device_mode(state, adap_state->device_mode); mxl_fail(err); err = mxl111sf_enable_usb_output(state); mxl_fail(err); err = mxl1x1sf_top_master_ctrl(state, 1); mxl_fail(err); if ((MXL111SF_GPIO_MOD_DVBT != adap_state->gpio_mode) && (state->chip_rev > MXL111SF_V6)) { mxl111sf_config_pin_mux_modes(state, PIN_MUX_TS_SPI_IN_MODE_1); mxl_fail(err); } err = mxl111sf_init_port_expander(state); if (!mxl_fail(err)) { state->gpio_mode = adap_state->gpio_mode; err = mxl111sf_gpio_mode_switch(state, state->gpio_mode); mxl_fail(err); #if 0 err = fe->ops.init(fe); #endif msleep(100); /* add short delay after enabling * the demod before touching it */ } return (adap_state->fe_init) ? adap_state->fe_init(fe) : 0; fail: return -ENODEV; } static int mxl111sf_adap_fe_sleep(struct dvb_frontend *fe) { struct mxl111sf_state *state = fe_to_priv(fe); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe->id]; int err; /* exit if we didn't initialize the driver yet */ if (!state->chip_id) { mxl_debug("driver not yet initialized, exit."); goto fail; } pr_debug("%s()\n", __func__); err = (adap_state->fe_sleep) ? adap_state->fe_sleep(fe) : 0; mutex_unlock(&state->fe_lock); return err; fail: return -ENODEV; } static int mxl111sf_ep6_streaming_ctrl(struct dvb_frontend *fe, int onoff) { struct mxl111sf_state *state = fe_to_priv(fe); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe->id]; int ret = 0; pr_debug("%s(%d)\n", __func__, onoff); if (onoff) { ret = mxl111sf_enable_usb_output(state); mxl_fail(ret); ret = mxl111sf_config_mpeg_in(state, 1, 1, adap_state->ep6_clockphase, 0, 0); mxl_fail(ret); #if 0 } else { ret = mxl111sf_disable_656_port(state); mxl_fail(ret); #endif } return ret; } static int mxl111sf_ep5_streaming_ctrl(struct dvb_frontend *fe, int onoff) { struct mxl111sf_state *state = fe_to_priv(fe); int ret = 0; pr_debug("%s(%d)\n", __func__, onoff); if (onoff) { ret = mxl111sf_enable_usb_output(state); mxl_fail(ret); ret = mxl111sf_init_i2s_port(state, 200); mxl_fail(ret); ret = mxl111sf_config_i2s(state, 0, 15); mxl_fail(ret); } else { ret = mxl111sf_disable_i2s_port(state); mxl_fail(ret); } if (state->chip_rev > MXL111SF_V6) ret = mxl111sf_config_spi(state, onoff); mxl_fail(ret); return ret; } static int mxl111sf_ep4_streaming_ctrl(struct dvb_frontend *fe, int onoff) { struct mxl111sf_state *state = fe_to_priv(fe); int ret = 0; pr_debug("%s(%d)\n", __func__, onoff); if (onoff) { ret = mxl111sf_enable_usb_output(state); mxl_fail(ret); } return ret; } /* ------------------------------------------------------------------------ */ static struct lgdt3305_config hauppauge_lgdt3305_config = { .i2c_addr = 0xb2 >> 1, .mpeg_mode = LGDT3305_MPEG_SERIAL, .tpclk_edge = LGDT3305_TPCLK_RISING_EDGE, .tpvalid_polarity = LGDT3305_TP_VALID_HIGH, .deny_i2c_rptr = 1, .spectral_inversion = 0, .qam_if_khz = 6000, .vsb_if_khz = 6000, }; static int mxl111sf_lgdt3305_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_ATSC; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lgdt3305_attach, &hauppauge_lgdt3305_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static struct lg2160_config hauppauge_lg2160_config = { .lg_chip = LG2160, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, }; static int mxl111sf_lg2160_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_MH; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; ret = get_chip_info(state); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lg2160_attach, &hauppauge_lg2160_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static struct lg2160_config hauppauge_lg2161_1019_config = { .lg_chip = LG2161_1019, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 2, /* LG2161_OIF_SPI_MAS */ }; static struct lg2160_config hauppauge_lg2161_1040_config = { .lg_chip = LG2161_1040, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 4, /* LG2161_OIF_SPI_MAS */ }; static int mxl111sf_lg2161_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_MH; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; ret = get_chip_info(state); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lg2160_attach, (MXL111SF_V8_200 == state->chip_rev) ? &hauppauge_lg2161_1040_config : &hauppauge_lg2161_1019_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static struct lg2160_config hauppauge_lg2161_1019_ep6_config = { .lg_chip = LG2161_1019, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 1, /* LG2161_OIF_SERIAL_TS */ }; static struct lg2160_config hauppauge_lg2161_1040_ep6_config = { .lg_chip = LG2161_1040, .i2c_addr = 0x1c >> 1, .deny_i2c_rptr = 1, .spectral_inversion = 0, .if_khz = 6000, .output_if = 7, /* LG2161_OIF_SERIAL_TS */ }; static int mxl111sf_lg2161_ep6_frontend_attach(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 2 : 1; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_MH; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_TUNER_MODE; adap_state->ep6_clockphase = 0; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_port_expander(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_gpio_mode_switch(state, state->gpio_mode); if (mxl_fail(ret)) goto fail; ret = get_chip_info(state); if (mxl_fail(ret)) goto fail; adap->fe[fe_id] = dvb_attach(lg2160_attach, (MXL111SF_V8_200 == state->chip_rev) ? &hauppauge_lg2161_1040_ep6_config : &hauppauge_lg2161_1019_ep6_config, &d->i2c_adap); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static const struct mxl111sf_demod_config mxl_demod_config = { .read_reg = mxl111sf_read_reg, .write_reg = mxl111sf_write_reg, .program_regs = mxl111sf_ctrl_program_regs, }; static int mxl111sf_attach_demod(struct dvb_usb_adapter *adap, u8 fe_id) { struct dvb_usb_device *d = adap_to_d(adap); struct mxl111sf_state *state = d_to_priv(d); struct mxl111sf_adap_state *adap_state = &state->adap_state[fe_id]; int ret; pr_debug("%s()\n", __func__); /* save a pointer to the dvb_usb_device in device state */ state->d = d; adap_state->alt_mode = (dvb_usb_mxl111sf_isoc) ? 1 : 2; state->alt_mode = adap_state->alt_mode; if (usb_set_interface(d->udev, 0, state->alt_mode) < 0) pr_err("set interface failed"); state->gpio_mode = MXL111SF_GPIO_MOD_DVBT; adap_state->gpio_mode = state->gpio_mode; adap_state->device_mode = MXL_SOC_MODE; adap_state->ep6_clockphase = 1; ret = mxl1x1sf_soft_reset(state); if (mxl_fail(ret)) goto fail; ret = mxl111sf_init_tuner_demod(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_set_device_mode(state, adap_state->device_mode); if (mxl_fail(ret)) goto fail; ret = mxl111sf_enable_usb_output(state); if (mxl_fail(ret)) goto fail; ret = mxl1x1sf_top_master_ctrl(state, 1); if (mxl_fail(ret)) goto fail; /* don't care if this fails */ mxl111sf_init_port_expander(state); adap->fe[fe_id] = dvb_attach(mxl111sf_demod_attach, state, &mxl_demod_config); if (adap->fe[fe_id]) { state->num_frontends++; adap_state->fe_init = adap->fe[fe_id]->ops.init; adap->fe[fe_id]->ops.init = mxl111sf_adap_fe_init; adap_state->fe_sleep = adap->fe[fe_id]->ops.sleep; adap->fe[fe_id]->ops.sleep = mxl111sf_adap_fe_sleep; return 0; } ret = -EIO; fail: return ret; } static inline int mxl111sf_set_ant_path(struct mxl111sf_state *state, int antpath) { return mxl111sf_idac_config(state, 1, 1, (antpath == ANT_PATH_INTERNAL) ? 0x3f : 0x00, 0); } #define DbgAntHunt(x, pwr0, pwr1, pwr2, pwr3) \ pr_err("%s(%d) FINAL input set to %s rxPwr:%d|%d|%d|%d\n", \ __func__, __LINE__, \ (ANT_PATH_EXTERNAL == x) ? "EXTERNAL" : "INTERNAL", \ pwr0, pwr1, pwr2, pwr3) #define ANT_HUNT_SLEEP 90 #define ANT_EXT_TWEAK 0 static int mxl111sf_ant_hunt(struct dvb_frontend *fe) { struct mxl111sf_state *state = fe_to_priv(fe); int antctrl = dvb_usb_mxl111sf_rfswitch; u16 rxPwrA, rxPwr0, rxPwr1, rxPwr2; /* FIXME: must force EXTERNAL for QAM - done elsewhere */ mxl111sf_set_ant_path(state, antctrl == ANT_PATH_AUTO ? ANT_PATH_EXTERNAL : antctrl); if (antctrl == ANT_PATH_AUTO) { #if 0 msleep(ANT_HUNT_SLEEP); #endif fe->ops.tuner_ops.get_rf_strength(fe, &rxPwrA); mxl111sf_set_ant_path(state, ANT_PATH_EXTERNAL); msleep(ANT_HUNT_SLEEP); fe->ops.tuner_ops.get_rf_strength(fe, &rxPwr0); mxl111sf_set_ant_path(state, ANT_PATH_EXTERNAL); msleep(ANT_HUNT_SLEEP); fe->ops.tuner_ops.get_rf_strength(fe, &rxPwr1); mxl111sf_set_ant_path(state, ANT_PATH_INTERNAL); msleep(ANT_HUNT_SLEEP); fe->ops.tuner_ops.get_rf_strength(fe, &rxPwr2); if (rxPwr1+ANT_EXT_TWEAK >= rxPwr2) { /* return with EXTERNAL enabled */ mxl111sf_set_ant_path(state, ANT_PATH_EXTERNAL); DbgAntHunt(ANT_PATH_EXTERNAL, rxPwrA, rxPwr0, rxPwr1, rxPwr2); } else { /* return with INTERNAL enabled */ DbgAntHunt(ANT_PATH_INTERNAL, rxPwrA, rxPwr0, rxPwr1, rxPwr2); } } return 0; } static const struct mxl111sf_tuner_config mxl_tuner_config = { .if_freq = MXL_IF_6_0, /* applies to external IF output, only */ .invert_spectrum = 0, .read_reg = mxl111sf_read_reg, .write_reg = mxl111sf_write_reg, .program_regs = mxl111sf_ctrl_program_regs, .top_master_ctrl = mxl1x1sf_top_master_ctrl, .ant_hunt = mxl111sf_ant_hunt, }; static int mxl111sf_attach_tuner(struct dvb_usb_adapter *adap) { struct mxl111sf_state *state = adap_to_priv(adap); #ifdef CONFIG_MEDIA_CONTROLLER_DVB struct media_device *mdev = dvb_get_media_controller(&adap->dvb_adap); int ret; #endif int i; pr_debug("%s()\n", __func__); for (i = 0; i < state->num_frontends; i++) { if (dvb_attach(mxl111sf_tuner_attach, adap->fe[i], state, &mxl_tuner_config) == NULL) return -EIO; adap->fe[i]->ops.read_signal_strength = adap->fe[i]->ops.tuner_ops.get_rf_strength; } #ifdef CONFIG_MEDIA_CONTROLLER_DVB state->tuner.function = MEDIA_ENT_F_TUNER; state->tuner.name = "mxl111sf tuner"; state->tuner_pads[MXL111SF_PAD_RF_INPUT].flags = MEDIA_PAD_FL_SINK; state->tuner_pads[MXL111SF_PAD_RF_INPUT].sig_type = PAD_SIGNAL_ANALOG; state->tuner_pads[MXL111SF_PAD_OUTPUT].flags = MEDIA_PAD_FL_SOURCE; state->tuner_pads[MXL111SF_PAD_OUTPUT].sig_type = PAD_SIGNAL_ANALOG; ret = media_entity_pads_init(&state->tuner, MXL111SF_NUM_PADS, state->tuner_pads); if (ret) return ret; ret = media_device_register_entity(mdev, &state->tuner); if (ret) return ret; #endif return 0; } static u32 mxl111sf_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm mxl111sf_i2c_algo = { .master_xfer = mxl111sf_i2c_xfer, .functionality = mxl111sf_i2c_func, #ifdef NEED_ALGO_CONTROL .algo_control = dummy_algo_control, #endif }; static int mxl111sf_init(struct dvb_usb_device *d) { struct mxl111sf_state *state = d_to_priv(d); int ret; static u8 eeprom[256]; u8 reg = 0; struct i2c_msg msg[2] = { { .addr = 0xa0 >> 1, .len = 1, .buf = ® }, { .addr = 0xa0 >> 1, .flags = I2C_M_RD, .len = sizeof(eeprom), .buf = eeprom }, }; ret = get_chip_info(state); if (mxl_fail(ret)) pr_err("failed to get chip info during probe"); mutex_init(&state->fe_lock); if (state->chip_rev > MXL111SF_V6) mxl111sf_config_pin_mux_modes(state, PIN_MUX_TS_SPI_IN_MODE_1); ret = i2c_transfer(&d->i2c_adap, msg, 2); if (mxl_fail(ret)) return 0; tveeprom_hauppauge_analog(&state->tv, (0x84 == eeprom[0xa0]) ? eeprom + 0xa0 : eeprom + 0x80); #if 0 switch (state->tv.model) { case 117001: case 126001: case 138001: break; default: printk(KERN_WARNING "%s: warning: unknown hauppauge model #%d\n", __func__, state->tv.model); } #endif return 0; } static int mxl111sf_frontend_attach_dvbt(struct dvb_usb_adapter *adap) { return mxl111sf_attach_demod(adap, 0); } static int mxl111sf_frontend_attach_atsc(struct dvb_usb_adapter *adap) { return mxl111sf_lgdt3305_frontend_attach(adap, 0); } static int mxl111sf_frontend_attach_mh(struct dvb_usb_adapter *adap) { return mxl111sf_lg2160_frontend_attach(adap, 0); } static int mxl111sf_frontend_attach_atsc_mh(struct dvb_usb_adapter *adap) { int ret; pr_debug("%s\n", __func__); ret = mxl111sf_lgdt3305_frontend_attach(adap, 0); if (ret < 0) return ret; ret = mxl111sf_attach_demod(adap, 1); if (ret < 0) return ret; ret = mxl111sf_lg2160_frontend_attach(adap, 2); if (ret < 0) return ret; return ret; } static int mxl111sf_frontend_attach_mercury(struct dvb_usb_adapter *adap) { int ret; pr_debug("%s\n", __func__); ret = mxl111sf_lgdt3305_frontend_attach(adap, 0); if (ret < 0) return ret; ret = mxl111sf_attach_demod(adap, 1); if (ret < 0) return ret; ret = mxl111sf_lg2161_ep6_frontend_attach(adap, 2); if (ret < 0) return ret; return ret; } static int mxl111sf_frontend_attach_mercury_mh(struct dvb_usb_adapter *adap) { int ret; pr_debug("%s\n", __func__); ret = mxl111sf_attach_demod(adap, 0); if (ret < 0) return ret; if (dvb_usb_mxl111sf_spi) ret = mxl111sf_lg2161_frontend_attach(adap, 1); else ret = mxl111sf_lg2161_ep6_frontend_attach(adap, 1); return ret; } static void mxl111sf_stream_config_bulk(struct usb_data_stream_properties *stream, u8 endpoint) { pr_debug("%s: endpoint=%d size=8192\n", __func__, endpoint); stream->type = USB_BULK; stream->count = 5; stream->endpoint = endpoint; stream->u.bulk.buffersize = 8192; } static void mxl111sf_stream_config_isoc(struct usb_data_stream_properties *stream, u8 endpoint, int framesperurb, int framesize) { pr_debug("%s: endpoint=%d size=%d\n", __func__, endpoint, framesperurb * framesize); stream->type = USB_ISOC; stream->count = 5; stream->endpoint = endpoint; stream->u.isoc.framesperurb = framesperurb; stream->u.isoc.framesize = framesize; stream->u.isoc.interval = 1; } /* DVB USB Driver stuff */ /* dvbt mxl111sf * bulk EP4/BULK/5/8192 * isoc EP4/ISOC/5/96/564 */ static int mxl111sf_get_stream_config_dvbt(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); return 0; } static int mxl111sf_probe(struct dvb_usb_device *dev) { struct mxl111sf_state *state = d_to_priv(dev); mutex_init(&state->msg_lock); return 0; } static struct dvb_usb_device_properties mxl111sf_props_dvbt = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_dvbt, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_ep4_streaming_ctrl, .get_stream_config = mxl111sf_get_stream_config_dvbt, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* atsc lgdt3305 * bulk EP6/BULK/5/8192 * isoc EP6/ISOC/5/24/3072 */ static int mxl111sf_get_stream_config_atsc(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); return 0; } static struct dvb_usb_device_properties mxl111sf_props_atsc = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_ep6_streaming_ctrl, .get_stream_config = mxl111sf_get_stream_config_atsc, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* mh lg2160 * bulk EP5/BULK/5/8192/RAW * isoc EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_mh(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); return 0; } static struct dvb_usb_device_properties mxl111sf_props_mh = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mh, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_ep5_streaming_ctrl, .get_stream_config = mxl111sf_get_stream_config_mh, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* atsc mh lgdt3305 mxl111sf lg2160 * bulk EP6/BULK/5/8192 EP4/BULK/5/8192 EP5/BULK/5/8192/RAW * isoc EP6/ISOC/5/24/3072 EP4/ISOC/5/96/564 EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_atsc_mh(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); if (fe->id == 0) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } else if (fe->id == 1) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); } else if (fe->id == 2) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); } return 0; } static int mxl111sf_streaming_ctrl_atsc_mh(struct dvb_frontend *fe, int onoff) { pr_debug("%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); if (fe->id == 0) return mxl111sf_ep6_streaming_ctrl(fe, onoff); else if (fe->id == 1) return mxl111sf_ep4_streaming_ctrl(fe, onoff); else if (fe->id == 2) return mxl111sf_ep5_streaming_ctrl(fe, onoff); return 0; } static struct dvb_usb_device_properties mxl111sf_props_atsc_mh = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc_mh, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_streaming_ctrl_atsc_mh, .get_stream_config = mxl111sf_get_stream_config_atsc_mh, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* mercury lgdt3305 mxl111sf lg2161 * tp bulk EP6/BULK/5/8192 EP4/BULK/5/8192 EP6/BULK/5/8192/RAW * tp isoc EP6/ISOC/5/24/3072 EP4/ISOC/5/96/564 EP6/ISOC/5/24/3072/RAW * spi bulk EP6/BULK/5/8192 EP4/BULK/5/8192 EP5/BULK/5/8192/RAW * spi isoc EP6/ISOC/5/24/3072 EP4/ISOC/5/96/564 EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_mercury(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); if (fe->id == 0) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } else if (fe->id == 1) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); } else if (fe->id == 2 && dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); } else if (fe->id == 2 && !dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } return 0; } static int mxl111sf_streaming_ctrl_mercury(struct dvb_frontend *fe, int onoff) { pr_debug("%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); if (fe->id == 0) return mxl111sf_ep6_streaming_ctrl(fe, onoff); else if (fe->id == 1) return mxl111sf_ep4_streaming_ctrl(fe, onoff); else if (fe->id == 2 && dvb_usb_mxl111sf_spi) return mxl111sf_ep5_streaming_ctrl(fe, onoff); else if (fe->id == 2 && !dvb_usb_mxl111sf_spi) return mxl111sf_ep6_streaming_ctrl(fe, onoff); return 0; } static struct dvb_usb_device_properties mxl111sf_props_mercury = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_streaming_ctrl_mercury, .get_stream_config = mxl111sf_get_stream_config_mercury, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; /* mercury mh mxl111sf lg2161 * tp bulk EP4/BULK/5/8192 EP6/BULK/5/8192/RAW * tp isoc EP4/ISOC/5/96/564 EP6/ISOC/5/24/3072/RAW * spi bulk EP4/BULK/5/8192 EP5/BULK/5/8192/RAW * spi isoc EP4/ISOC/5/96/564 EP5/ISOC/5/96/200/RAW */ static int mxl111sf_get_stream_config_mercury_mh(struct dvb_frontend *fe, u8 *ts_type, struct usb_data_stream_properties *stream) { pr_debug("%s: fe=%d\n", __func__, fe->id); if (fe->id == 0) { *ts_type = DVB_USB_FE_TS_TYPE_188; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 4, 96, 564); else mxl111sf_stream_config_bulk(stream, 4); } else if (fe->id == 1 && dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 5, 96, 200); else mxl111sf_stream_config_bulk(stream, 5); } else if (fe->id == 1 && !dvb_usb_mxl111sf_spi) { *ts_type = DVB_USB_FE_TS_TYPE_RAW; if (dvb_usb_mxl111sf_isoc) mxl111sf_stream_config_isoc(stream, 6, 24, 3072); else mxl111sf_stream_config_bulk(stream, 6); } return 0; } static int mxl111sf_streaming_ctrl_mercury_mh(struct dvb_frontend *fe, int onoff) { pr_debug("%s: fe=%d onoff=%d\n", __func__, fe->id, onoff); if (fe->id == 0) return mxl111sf_ep4_streaming_ctrl(fe, onoff); else if (fe->id == 1 && dvb_usb_mxl111sf_spi) return mxl111sf_ep5_streaming_ctrl(fe, onoff); else if (fe->id == 1 && !dvb_usb_mxl111sf_spi) return mxl111sf_ep6_streaming_ctrl(fe, onoff); return 0; } static struct dvb_usb_device_properties mxl111sf_props_mercury_mh = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, .adapter_nr = adapter_nr, .size_of_priv = sizeof(struct mxl111sf_state), .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury_mh, .tuner_attach = mxl111sf_attach_tuner, .init = mxl111sf_init, .streaming_ctrl = mxl111sf_streaming_ctrl_mercury_mh, .get_stream_config = mxl111sf_get_stream_config_mercury_mh, .num_adapters = 1, .adapter = { { .stream = DVB_USB_STREAM_ISOC(6, 5, 24, 3072, 1), } } }; static const struct usb_device_id mxl111sf_id_table[] = { { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc600, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc601, &mxl111sf_props_atsc, "Hauppauge 126xxx ATSC", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc602, &mxl111sf_props_mh, "HCW 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc603, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc604, &mxl111sf_props_dvbt, "Hauppauge 126xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc609, &mxl111sf_props_atsc, "Hauppauge 126xxx ATSC", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc60a, &mxl111sf_props_mh, "HCW 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc60b, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc60c, &mxl111sf_props_dvbt, "Hauppauge 126xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc653, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc65b, &mxl111sf_props_atsc_mh, "Hauppauge 126xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb700, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb701, &mxl111sf_props_atsc, "Hauppauge 126xxx ATSC", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb702, &mxl111sf_props_mh, "HCW 117xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb703, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb704, &mxl111sf_props_dvbt, "Hauppauge 117xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb753, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb763, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb764, &mxl111sf_props_dvbt, "Hauppauge 117xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd853, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd854, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd863, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd864, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8d3, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8d4, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8e3, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8e4, &mxl111sf_props_dvbt, "Hauppauge 138xxx DVBT", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xd8ff, &mxl111sf_props_mercury, "Hauppauge Mercury", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc612, &mxl111sf_props_mercury_mh, "Hauppauge 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc613, &mxl111sf_props_mercury, "Hauppauge WinTV-Aero-M", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc61a, &mxl111sf_props_mercury_mh, "Hauppauge 126xxx", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xc61b, &mxl111sf_props_mercury, "Hauppauge WinTV-Aero-M", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb757, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { DVB_USB_DEVICE(USB_VID_HAUPPAUGE, 0xb767, &mxl111sf_props_atsc_mh, "Hauppauge 117xxx ATSC+", NULL) }, { } }; MODULE_DEVICE_TABLE(usb, mxl111sf_id_table); static struct usb_driver mxl111sf_usb_driver = { .name = KBUILD_MODNAME, .id_table = mxl111sf_id_table, .probe = dvb_usbv2_probe, .disconnect = dvb_usbv2_disconnect, .suspend = dvb_usbv2_suspend, .resume = dvb_usbv2_resume, .no_dynamic_id = 1, .soft_unbind = 1, }; module_usb_driver(mxl111sf_usb_driver); MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>"); MODULE_DESCRIPTION("Driver for MaxLinear MxL111SF"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL"); |
| 151 149 150 147 148 149 149 149 149 145 147 148 147 146 147 148 147 149 149 147 149 147 149 147 149 148 148 148 148 148 147 148 147 147 149 149 148 1 49 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_FORMAT_H #define _BCACHEFS_FORMAT_H /* * bcachefs on disk data structures * * OVERVIEW: * * There are three main types of on disk data structures in bcachefs (this is * reduced from 5 in bcache) * * - superblock * - journal * - btree * * The btree is the primary structure; most metadata exists as keys in the * various btrees. There are only a small number of btrees, they're not * sharded - we have one btree for extents, another for inodes, et cetera. * * SUPERBLOCK: * * The superblock contains the location of the journal, the list of devices in * the filesystem, and in general any metadata we need in order to decide * whether we can start a filesystem or prior to reading the journal/btree * roots. * * The superblock is extensible, and most of the contents of the superblock are * in variable length, type tagged fields; see struct bch_sb_field. * * Backup superblocks do not reside in a fixed location; also, superblocks do * not have a fixed size. To locate backup superblocks we have struct * bch_sb_layout; we store a copy of this inside every superblock, and also * before the first superblock. * * JOURNAL: * * The journal primarily records btree updates in the order they occurred; * journal replay consists of just iterating over all the keys in the open * journal entries and re-inserting them into the btrees. * * The journal also contains entry types for the btree roots, and blacklisted * journal sequence numbers (see journal_seq_blacklist.c). * * BTREE: * * bcachefs btrees are copy on write b+ trees, where nodes are big (typically * 128k-256k) and log structured. We use struct btree_node for writing the first * entry in a given node (offset 0), and struct btree_node_entry for all * subsequent writes. * * After the header, btree node entries contain a list of keys in sorted order. * Values are stored inline with the keys; since values are variable length (and * keys effectively are variable length too, due to packing) we can't do random * access without building up additional in memory tables in the btree node read * path. * * BTREE KEYS (struct bkey): * * The various btrees share a common format for the key - so as to avoid * switching in fastpath lookup/comparison code - but define their own * structures for the key values. * * The size of a key/value pair is stored as a u8 in units of u64s, so the max * size is just under 2k. The common part also contains a type tag for the * value, and a format field indicating whether the key is packed or not (and * also meant to allow adding new key fields in the future, if desired). * * bkeys, when stored within a btree node, may also be packed. In that case, the * bkey_format in that node is used to unpack it. Packed bkeys mean that we can * be generous with field sizes in the common part of the key format (64 bit * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost. */ #include <asm/types.h> #include <asm/byteorder.h> #include <linux/kernel.h> #include <linux/uuid.h> #include <uapi/linux/magic.h> #include "vstructs.h" #ifdef __KERNEL__ typedef uuid_t __uuid_t; #endif #define BITMASK(name, type, field, offset, end) \ static const __maybe_unused unsigned name##_OFFSET = offset; \ static const __maybe_unused unsigned name##_BITS = (end - offset); \ \ static inline __u64 name(const type *k) \ { \ return (k->field >> offset) & ~(~0ULL << (end - offset)); \ } \ \ static inline void SET_##name(type *k, __u64 v) \ { \ k->field &= ~(~(~0ULL << (end - offset)) << offset); \ k->field |= (v & ~(~0ULL << (end - offset))) << offset; \ } #define LE_BITMASK(_bits, name, type, field, offset, end) \ static const __maybe_unused unsigned name##_OFFSET = offset; \ static const __maybe_unused unsigned name##_BITS = (end - offset); \ static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\ \ static inline __u64 name(const type *k) \ { \ return (__le##_bits##_to_cpu(k->field) >> offset) & \ ~(~0ULL << (end - offset)); \ } \ \ static inline void SET_##name(type *k, __u64 v) \ { \ __u##_bits new = __le##_bits##_to_cpu(k->field); \ \ new &= ~(~(~0ULL << (end - offset)) << offset); \ new |= (v & ~(~0ULL << (end - offset))) << offset; \ k->field = __cpu_to_le##_bits(new); \ } #define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e) #define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e) #define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e) struct bkey_format { __u8 key_u64s; __u8 nr_fields; /* One unused slot for now: */ __u8 bits_per_field[6]; __le64 field_offset[6]; }; /* Btree keys - all units are in sectors */ struct bpos { /* * Word order matches machine byte order - btree code treats a bpos as a * single large integer, for search/comparison purposes * * Note that wherever a bpos is embedded in another on disk data * structure, it has to be byte swabbed when reading in metadata that * wasn't written in native endian order: */ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u32 snapshot; __u64 offset; __u64 inode; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ __u64 inode; __u64 offset; /* Points to end of extent - sectors */ __u32 snapshot; #else #error edit for your odd byteorder. #endif } __packed #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __aligned(4) #endif ; #define KEY_INODE_MAX ((__u64)~0ULL) #define KEY_OFFSET_MAX ((__u64)~0ULL) #define KEY_SNAPSHOT_MAX ((__u32)~0U) #define KEY_SIZE_MAX ((__u32)~0U) static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot) { return (struct bpos) { .inode = inode, .offset = offset, .snapshot = snapshot, }; } #define POS_MIN SPOS(0, 0, 0) #define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, 0) #define SPOS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX) #define POS(_inode, _offset) SPOS(_inode, _offset, 0) /* Empty placeholder struct, for container_of() */ struct bch_val { __u64 __nothing[0]; }; struct bversion { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u64 lo; __u32 hi; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ __u32 hi; __u64 lo; #endif } __packed #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __aligned(4) #endif ; struct bkey { /* Size of combined key and value, in u64s */ __u8 u64s; /* Format of key (0 for format local to btree node) */ #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 format:7, needs_whiteout:1; #elif defined (__BIG_ENDIAN_BITFIELD) __u8 needs_whiteout:1, format:7; #else #error edit for your odd byteorder. #endif /* Type of the value */ __u8 type; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ __u8 pad[1]; struct bversion bversion; __u32 size; /* extent size, in sectors */ struct bpos p; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ struct bpos p; __u32 size; /* extent size, in sectors */ struct bversion bversion; __u8 pad[1]; #endif } __packed #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ /* * The big-endian version of bkey can't be compiled by rustc with the "aligned" * attr since it doesn't allow types to have both "packed" and "aligned" attrs. * So for Rust compatibility, don't include this. It can be included in the LE * version because the "packed" attr is redundant in that case. * * History: (quoting Kent) * * Specifically, when i was designing bkey, I wanted the header to be no * bigger than necessary so that bkey_packed could use the rest. That means that * decently offten extent keys will fit into only 8 bytes, instead of spilling over * to 16. * * But packed_bkey treats the part after the header - the packed section - * as a single multi word, variable length integer. And bkey, the unpacked * version, is just a special case version of a bkey_packed; all the packed * bkey code will work on keys in any packed format, the in-memory * representation of an unpacked key also is just one type of packed key... * * So that constrains the key part of a bkig endian bkey to start right * after the header. * * If we ever do a bkey_v2 and need to expand the hedaer by another byte for * some reason - that will clean up this wart. */ __aligned(8) #endif ; struct bkey_packed { __u64 _data[0]; /* Size of combined key and value, in u64s */ __u8 u64s; /* Format of key (0 for format local to btree node) */ /* * XXX: next incompat on disk format change, switch format and * needs_whiteout - bkey_packed() will be cheaper if format is the high * bits of the bitfield */ #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 format:7, needs_whiteout:1; #elif defined (__BIG_ENDIAN_BITFIELD) __u8 needs_whiteout:1, format:7; #endif /* Type of the value */ __u8 type; __u8 key_start[0]; /* * We copy bkeys with struct assignment in various places, and while * that shouldn't be done with packed bkeys we can't disallow it in C, * and it's legal to cast a bkey to a bkey_packed - so padding it out * to the same size as struct bkey should hopefully be safest. */ __u8 pad[sizeof(struct bkey) - 3]; } __packed __aligned(8); typedef struct { __le64 lo; __le64 hi; } bch_le128; #define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64)) #define BKEY_U64s_MAX U8_MAX #define BKEY_VAL_U64s_MAX (BKEY_U64s_MAX - BKEY_U64s) #define KEY_PACKED_BITS_START 24 #define KEY_FORMAT_LOCAL_BTREE 0 #define KEY_FORMAT_CURRENT 1 enum bch_bkey_fields { BKEY_FIELD_INODE, BKEY_FIELD_OFFSET, BKEY_FIELD_SNAPSHOT, BKEY_FIELD_SIZE, BKEY_FIELD_VERSION_HI, BKEY_FIELD_VERSION_LO, BKEY_NR_FIELDS, }; #define bkey_format_field(name, field) \ [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8) #define BKEY_FORMAT_CURRENT \ ((struct bkey_format) { \ .key_u64s = BKEY_U64s, \ .nr_fields = BKEY_NR_FIELDS, \ .bits_per_field = { \ bkey_format_field(INODE, p.inode), \ bkey_format_field(OFFSET, p.offset), \ bkey_format_field(SNAPSHOT, p.snapshot), \ bkey_format_field(SIZE, size), \ bkey_format_field(VERSION_HI, bversion.hi), \ bkey_format_field(VERSION_LO, bversion.lo), \ }, \ }) /* bkey with inline value */ struct bkey_i { __u64 _data[0]; struct bkey k; struct bch_val v; }; #define POS_KEY(_pos) \ ((struct bkey) { \ .u64s = BKEY_U64s, \ .format = KEY_FORMAT_CURRENT, \ .p = _pos, \ }) #define KEY(_inode, _offset, _size) \ ((struct bkey) { \ .u64s = BKEY_U64s, \ .format = KEY_FORMAT_CURRENT, \ .p = POS(_inode, _offset), \ .size = _size, \ }) static inline void bkey_init(struct bkey *k) { *k = KEY(0, 0, 0); } #define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64)) #define __BKEY_PADDED(key, pad) \ struct bkey_i key; __u64 key ## _pad[pad] /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. * * - DISCARDED keys indicate that the data is all 0s because it has been * discarded. DISCARDs may have a version; if the version is nonzero the key * will be persistent, otherwise the key will be dropped whenever the btree * node is rewritten (like DELETED keys). * * - ERROR: any read of the data returns a read error, as the data was lost due * to a failing device. Like DISCARDED keys, they can be removed (overridden) * by new writes or cluster-wide GC. Node repair can also overwrite them with * the same or a more recent version number, but not with an older version * number. * * - WHITEOUT: for hash table btrees */ #define BCH_BKEY_TYPES() \ x(deleted, 0) \ x(whiteout, 1) \ x(error, 2) \ x(cookie, 3) \ x(hash_whiteout, 4) \ x(btree_ptr, 5) \ x(extent, 6) \ x(reservation, 7) \ x(inode, 8) \ x(inode_generation, 9) \ x(dirent, 10) \ x(xattr, 11) \ x(alloc, 12) \ x(quota, 13) \ x(stripe, 14) \ x(reflink_p, 15) \ x(reflink_v, 16) \ x(inline_data, 17) \ x(btree_ptr_v2, 18) \ x(indirect_inline_data, 19) \ x(alloc_v2, 20) \ x(subvolume, 21) \ x(snapshot, 22) \ x(inode_v2, 23) \ x(alloc_v3, 24) \ x(set, 25) \ x(lru, 26) \ x(alloc_v4, 27) \ x(backpointer, 28) \ x(inode_v3, 29) \ x(bucket_gens, 30) \ x(snapshot_tree, 31) \ x(logged_op_truncate, 32) \ x(logged_op_finsert, 33) \ x(accounting, 34) \ x(inode_alloc_cursor, 35) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, BCH_BKEY_TYPES() #undef x KEY_TYPE_MAX, }; struct bch_deleted { struct bch_val v; }; struct bch_whiteout { struct bch_val v; }; struct bch_error { struct bch_val v; }; struct bch_cookie { struct bch_val v; __le64 cookie; }; struct bch_hash_whiteout { struct bch_val v; }; struct bch_set { struct bch_val v; }; /* 128 bits, sufficient for cryptographic MACs: */ struct bch_csum { __le64 lo; __le64 hi; } __packed __aligned(8); struct bch_backpointer { struct bch_val v; __u8 btree_id; __u8 level; __u8 data_type; __u8 bucket_gen; __u32 pad; __u32 bucket_len; struct bpos pos; } __packed __aligned(8); /* Optional/variable size superblock sections: */ struct bch_sb_field { __u64 _data[0]; __le32 u64s; __le32 type; }; #define BCH_SB_FIELDS() \ x(journal, 0) \ x(members_v1, 1) \ x(crypt, 2) \ x(replicas_v0, 3) \ x(quota, 4) \ x(disk_groups, 5) \ x(clean, 6) \ x(replicas, 7) \ x(journal_seq_blacklist, 8) \ x(journal_v2, 9) \ x(counters, 10) \ x(members_v2, 11) \ x(errors, 12) \ x(ext, 13) \ x(downgrade, 14) #include "alloc_background_format.h" #include "dirent_format.h" #include "disk_accounting_format.h" #include "disk_groups_format.h" #include "extents_format.h" #include "ec_format.h" #include "inode_format.h" #include "journal_seq_blacklist_format.h" #include "logged_ops_format.h" #include "lru_format.h" #include "quota_format.h" #include "reflink_format.h" #include "replicas_format.h" #include "snapshot_format.h" #include "subvolume_format.h" #include "sb-counters_format.h" #include "sb-downgrade_format.h" #include "sb-errors_format.h" #include "sb-members_format.h" #include "xattr_format.h" enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, BCH_SB_FIELDS() #undef x BCH_SB_FIELD_NR }; /* * Most superblock fields are replicated in all device's superblocks - a few are * not: */ #define BCH_SINGLE_DEVICE_SB_FIELDS \ ((1U << BCH_SB_FIELD_journal)| \ (1U << BCH_SB_FIELD_journal_v2)) /* BCH_SB_FIELD_journal: */ struct bch_sb_field_journal { struct bch_sb_field field; __le64 buckets[]; }; struct bch_sb_field_journal_v2 { struct bch_sb_field field; struct bch_sb_field_journal_v2_entry { __le64 start; __le64 nr; } d[]; }; /* BCH_SB_FIELD_crypt: */ struct nonce { __le32 d[4]; }; struct bch_key { __le64 key[4]; }; #define BCH_KEY_MAGIC \ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \ ((__u64) 'e' << 48)|((__u64) 'y' << 56)) struct bch_encrypted_key { __le64 magic; struct bch_key key; }; /* * If this field is present in the superblock, it stores an encryption key which * is used encrypt all other data/metadata. The key will normally be encrypted * with the key userspace provides, but if encryption has been turned off we'll * just store the master key unencrypted in the superblock so we can access the * previously encrypted data. */ struct bch_sb_field_crypt { struct bch_sb_field field; __le64 flags; __le64 kdf_flags; struct bch_encrypted_key key; }; LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4); enum bch_kdf_types { BCH_KDF_SCRYPT = 0, BCH_KDF_NR = 1, }; /* stored as base 2 log of scrypt params: */ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16); LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32); LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); /* * On clean shutdown, store btree roots and current journal sequence number in * the superblock: */ struct jset_entry { __le16 u64s; __u8 btree_id; __u8 level; __u8 type; /* designates what this jset holds */ __u8 pad[3]; struct bkey_i start[0]; __u64 _data[]; }; struct bch_sb_field_clean { struct bch_sb_field field; __le32 flags; __le16 _read_clock; /* no longer used */ __le16 _write_clock; __le64 journal_seq; struct jset_entry start[0]; __u64 _data[]; }; struct bch_sb_field_ext { struct bch_sb_field field; __le64 recovery_passes_required[2]; __le64 errors_silent[8]; __le64 btrees_lost_data; }; /* Superblock: */ /* * New versioning scheme: * One common version number for all on disk data structures - superblock, btree * nodes, journal entries */ #define BCH_VERSION_MAJOR(_v) ((__u16) ((_v) >> 10)) #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) /* * field 1: version name * field 2: BCH_VERSION(major, minor) * field 3: recovery passess required on upgrade */ #define BCH_METADATA_VERSIONS() \ x(bkey_renumber, BCH_VERSION(0, 10)) \ x(inode_btree_change, BCH_VERSION(0, 11)) \ x(snapshot, BCH_VERSION(0, 12)) \ x(inode_backpointers, BCH_VERSION(0, 13)) \ x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \ x(snapshot_2, BCH_VERSION(0, 15)) \ x(reflink_p_fix, BCH_VERSION(0, 16)) \ x(subvol_dirent, BCH_VERSION(0, 17)) \ x(inode_v2, BCH_VERSION(0, 18)) \ x(freespace, BCH_VERSION(0, 19)) \ x(alloc_v4, BCH_VERSION(0, 20)) \ x(new_data_types, BCH_VERSION(0, 21)) \ x(backpointers, BCH_VERSION(0, 22)) \ x(inode_v3, BCH_VERSION(0, 23)) \ x(unwritten_extents, BCH_VERSION(0, 24)) \ x(bucket_gens, BCH_VERSION(0, 25)) \ x(lru_v2, BCH_VERSION(0, 26)) \ x(fragmentation_lru, BCH_VERSION(0, 27)) \ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \ x(snapshot_trees, BCH_VERSION(0, 29)) \ x(major_minor, BCH_VERSION(1, 0)) \ x(snapshot_skiplists, BCH_VERSION(1, 1)) \ x(deleted_inodes, BCH_VERSION(1, 2)) \ x(rebalance_work, BCH_VERSION(1, 3)) \ x(member_seq, BCH_VERSION(1, 4)) \ x(subvolume_fs_parent, BCH_VERSION(1, 5)) \ x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ x(disk_accounting_v3, BCH_VERSION(1, 10)) \ x(disk_accounting_inum, BCH_VERSION(1, 11)) \ x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \ x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \ x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \ x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \ x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \ x(inode_depth, BCH_VERSION(1, 17)) \ x(persistent_inode_cursors, BCH_VERSION(1, 18)) \ x(autofix_errors, BCH_VERSION(1, 19)) \ x(directory_size, BCH_VERSION(1, 20)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, #define x(t, n) bcachefs_metadata_version_##t = n, BCH_METADATA_VERSIONS() #undef x bcachefs_metadata_version_max }; static const __maybe_unused unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define BCH_SB_SECTOR 8 #define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */ struct bch_sb_layout { __uuid_t magic; /* bcachefs superblock UUID */ __u8 layout_type; __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */ __u8 nr_superblocks; __u8 pad[5]; __le64 sb_offset[61]; } __packed __aligned(8); #define BCH_SB_LAYOUT_SECTOR 7 /* * @offset - sector where this sb was written * @version - on disk format version * @version_min - Oldest metadata version this filesystem contains; so we can * safely drop compatibility code and refuse to mount filesystems * we'd need it for * @magic - identifies as a bcachefs superblock (BCHFS_MAGIC) * @seq - incremented each time superblock is written * @uuid - used for generating various magic numbers and identifying * member devices, never changes * @user_uuid - user visible UUID, may be changed * @label - filesystem label * @seq - identifies most recent superblock, incremented each time * superblock is written * @features - enabled incompatible features */ struct bch_sb { struct bch_csum csum; __le16 version; __le16 version_min; __le16 pad[2]; __uuid_t magic; __uuid_t uuid; __uuid_t user_uuid; __u8 label[BCH_SB_LABEL_SIZE]; __le64 offset; __le64 seq; __le16 block_size; __u8 dev_idx; __u8 nr_devices; __le32 u64s; __le64 time_base_lo; __le32 time_base_hi; __le32 time_precision; __le64 flags[7]; __le64 write_time; __le64 features[2]; __le64 compat[2]; struct bch_sb_layout layout; struct bch_sb_field start[0]; __le64 _data[]; } __packed __aligned(8); /* * Flags: * BCH_SB_INITALIZED - set on first mount * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect * behaviour of mount/recovery path: * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80 * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides * DATA/META_CSUM_TYPE. Also indicates encryption * algorithm in use, if/when we get more than one */ LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16); LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1); LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2); LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8); LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12); LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28); LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33); LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40); LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44); LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48); LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52); LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56); LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57); LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58); LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59); LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60); LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61); LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62); LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); LE64_BITMASK(BCH_SB_PROMOTE_WHOLE_EXTENTS, struct bch_sb, flags[0], 63, 64); LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8); LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9); LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10); LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14); /* * Max size of an extent that may require bouncing to read or write * (checksummed, compressed): 64k */ LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS, struct bch_sb, flags[1], 14, 20); LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24); LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28); LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40); LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52); LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64); LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO, struct bch_sb, flags[2], 0, 4); LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64); LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29); LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54); LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56); LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60); LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, struct bch_sb, flags[4], 60, 64); LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, struct bch_sb, flags[5], 16, 32); LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48); LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, struct bch_sb, flags[5], 48, 64); LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4); } static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) { SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v); SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4); } static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb) { return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) | (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4); } static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) { SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v); SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4); } /* * Features: * * journal_seq_blacklist_v3: gates BCH_SB_FIELD_journal_seq_blacklist * reflink: gates KEY_TYPE_reflink * inline_data: gates KEY_TYPE_inline_data * new_siphash: gates BCH_STR_HASH_siphash * new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE */ #define BCH_SB_FEATURES() \ x(lz4, 0) \ x(gzip, 1) \ x(zstd, 2) \ x(atomic_nlink, 3) \ x(ec, 4) \ x(journal_seq_blacklist_v3, 5) \ x(reflink, 6) \ x(new_siphash, 7) \ x(inline_data, 8) \ x(new_extent_overwrite, 9) \ x(incompressible, 10) \ x(btree_ptr_v2, 11) \ x(extents_above_btree_updates, 12) \ x(btree_updates_journalled, 13) \ x(reflink_inline_data, 14) \ x(new_varint, 15) \ x(journal_no_flush, 16) \ x(alloc_v2, 17) \ x(extents_across_btree_nodes, 18) \ x(incompat_version_field, 19) #define BCH_SB_FEATURES_ALWAYS \ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ BIT_ULL(BCH_FEATURE_extents_above_btree_updates)|\ BIT_ULL(BCH_FEATURE_btree_updates_journalled)|\ BIT_ULL(BCH_FEATURE_alloc_v2)|\ BIT_ULL(BCH_FEATURE_extents_across_btree_nodes)) #define BCH_SB_FEATURES_ALL \ (BCH_SB_FEATURES_ALWAYS| \ BIT_ULL(BCH_FEATURE_new_siphash)| \ BIT_ULL(BCH_FEATURE_btree_ptr_v2)| \ BIT_ULL(BCH_FEATURE_new_varint)| \ BIT_ULL(BCH_FEATURE_journal_no_flush)) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, BCH_SB_FEATURES() #undef x BCH_FEATURE_NR, }; #define BCH_SB_COMPAT() \ x(alloc_info, 0) \ x(alloc_metadata, 1) \ x(extents_above_btree_updates_done, 2) \ x(bformat_overflow_done, 3) enum bch_sb_compat { #define x(f, n) BCH_COMPAT_##f, BCH_SB_COMPAT() #undef x BCH_COMPAT_NR, }; /* options: */ #define BCH_VERSION_UPGRADE_OPTS() \ x(compatible, 0) \ x(incompatible, 1) \ x(none, 2) enum bch_version_upgrade_opts { #define x(t, n) BCH_VERSION_UPGRADE_##t = n, BCH_VERSION_UPGRADE_OPTS() #undef x }; #define BCH_REPLICAS_MAX 4U #define BCH_BKEY_PTRS_MAX 16U #define BCH_ERROR_ACTIONS() \ x(continue, 0) \ x(fix_safe, 1) \ x(panic, 2) \ x(ro, 3) enum bch_error_actions { #define x(t, n) BCH_ON_ERROR_##t = n, BCH_ERROR_ACTIONS() #undef x BCH_ON_ERROR_NR }; #define BCH_STR_HASH_TYPES() \ x(crc32c, 0) \ x(crc64, 1) \ x(siphash_old, 2) \ x(siphash, 3) enum bch_str_hash_type { #define x(t, n) BCH_STR_HASH_##t = n, BCH_STR_HASH_TYPES() #undef x BCH_STR_HASH_NR }; #define BCH_STR_HASH_OPTS() \ x(crc32c, 0) \ x(crc64, 1) \ x(siphash, 2) enum bch_str_hash_opts { #define x(t, n) BCH_STR_HASH_OPT_##t = n, BCH_STR_HASH_OPTS() #undef x BCH_STR_HASH_OPT_NR }; #define BCH_CSUM_TYPES() \ x(none, 0) \ x(crc32c_nonzero, 1) \ x(crc64_nonzero, 2) \ x(chacha20_poly1305_80, 3) \ x(chacha20_poly1305_128, 4) \ x(crc32c, 5) \ x(crc64, 6) \ x(xxhash, 7) enum bch_csum_type { #define x(t, n) BCH_CSUM_##t = n, BCH_CSUM_TYPES() #undef x BCH_CSUM_NR }; static const __maybe_unused unsigned bch_crc_bytes[] = { [BCH_CSUM_none] = 0, [BCH_CSUM_crc32c_nonzero] = 4, [BCH_CSUM_crc32c] = 4, [BCH_CSUM_crc64_nonzero] = 8, [BCH_CSUM_crc64] = 8, [BCH_CSUM_xxhash] = 8, [BCH_CSUM_chacha20_poly1305_80] = 10, [BCH_CSUM_chacha20_poly1305_128] = 16, }; static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type) { switch (type) { case BCH_CSUM_chacha20_poly1305_80: case BCH_CSUM_chacha20_poly1305_128: return true; default: return false; } } #define BCH_CSUM_OPTS() \ x(none, 0) \ x(crc32c, 1) \ x(crc64, 2) \ x(xxhash, 3) enum bch_csum_opt { #define x(t, n) BCH_CSUM_OPT_##t = n, BCH_CSUM_OPTS() #undef x BCH_CSUM_OPT_NR }; #define BCH_COMPRESSION_TYPES() \ x(none, 0) \ x(lz4_old, 1) \ x(gzip, 2) \ x(lz4, 3) \ x(zstd, 4) \ x(incompressible, 5) enum bch_compression_type { #define x(t, n) BCH_COMPRESSION_TYPE_##t = n, BCH_COMPRESSION_TYPES() #undef x BCH_COMPRESSION_TYPE_NR }; #define BCH_COMPRESSION_OPTS() \ x(none, 0) \ x(lz4, 1) \ x(gzip, 2) \ x(zstd, 3) enum bch_compression_opts { #define x(t, n) BCH_COMPRESSION_OPT_##t = n, BCH_COMPRESSION_OPTS() #undef x BCH_COMPRESSION_OPT_NR }; /* * Magic numbers * * The various other data structures have their own magic numbers, which are * xored with the first part of the cache set's UUID */ #define BCACHE_MAGIC \ UUID_INIT(0xc68573f6, 0x4e1a, 0x45ca, \ 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81) #define BCHFS_MAGIC \ UUID_INIT(0xc68573f6, 0x66ce, 0x90a9, \ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef) #define BCACHEFS_STATFS_MAGIC BCACHEFS_SUPER_MAGIC #define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL) #define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL) static inline __le64 __bch2_sb_magic(struct bch_sb *sb) { __le64 ret; memcpy(&ret, &sb->uuid, sizeof(ret)); return ret; } static inline __u64 __jset_magic(struct bch_sb *sb) { return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC); } static inline __u64 __bset_magic(struct bch_sb *sb) { return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC); } /* Journal */ #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) #define BCH_JSET_ENTRY_TYPES() \ x(btree_keys, 0) \ x(btree_root, 1) \ x(prio_ptrs, 2) \ x(blacklist, 3) \ x(blacklist_v2, 4) \ x(usage, 5) \ x(data_usage, 6) \ x(clock, 7) \ x(dev_usage, 8) \ x(log, 9) \ x(overwrite, 10) \ x(write_buffer_keys, 11) \ x(datetime, 12) enum bch_jset_entry_type { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, BCH_JSET_ENTRY_TYPES() #undef x BCH_JSET_ENTRY_NR }; static inline bool jset_entry_is_key(struct jset_entry *e) { switch (e->type) { case BCH_JSET_ENTRY_btree_keys: case BCH_JSET_ENTRY_btree_root: case BCH_JSET_ENTRY_write_buffer_keys: return true; } return false; } /* * Journal sequence numbers can be blacklisted: bsets record the max sequence * number of all the journal entries they contain updates for, so that on * recovery we can ignore those bsets that contain index updates newer that what * made it into the journal. * * This means that we can't reuse that journal_seq - we have to skip it, and * then record that we skipped it so that the next time we crash and recover we * don't think there was a missing journal entry. */ struct jset_entry_blacklist { struct jset_entry entry; __le64 seq; }; struct jset_entry_blacklist_v2 { struct jset_entry entry; __le64 start; __le64 end; }; #define BCH_FS_USAGE_TYPES() \ x(reserved, 0) \ x(inodes, 1) \ x(key_version, 2) enum bch_fs_usage_type { #define x(f, nr) BCH_FS_USAGE_##f = nr, BCH_FS_USAGE_TYPES() #undef x BCH_FS_USAGE_NR }; struct jset_entry_usage { struct jset_entry entry; __le64 v; } __packed; struct jset_entry_data_usage { struct jset_entry entry; __le64 v; struct bch_replicas_entry_v1 r; } __packed; struct jset_entry_clock { struct jset_entry entry; __u8 rw; __u8 pad[7]; __le64 time; } __packed; struct jset_entry_dev_usage_type { __le64 buckets; __le64 sectors; __le64 fragmented; } __packed; struct jset_entry_dev_usage { struct jset_entry entry; __le32 dev; __u32 pad; __le64 _buckets_ec; /* No longer used */ __le64 _buckets_unavailable; /* No longer used */ struct jset_entry_dev_usage_type d[]; }; static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage *u) { return (vstruct_bytes(&u->entry) - sizeof(struct jset_entry_dev_usage)) / sizeof(struct jset_entry_dev_usage_type); } struct jset_entry_log { struct jset_entry entry; u8 d[]; } __packed __aligned(8); static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) { unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); while (b && !l->d[b - 1]) --b; return b; } struct jset_entry_datetime { struct jset_entry entry; __le64 seconds; } __packed __aligned(8); /* * On disk format for a journal entry: * seq is monotonically increasing; every journal entry has its own unique * sequence number. * * last_seq is the oldest journal entry that still has keys the btree hasn't * flushed to disk yet. * * version is for on disk format changes. */ struct jset { struct bch_csum csum; __le64 magic; __le64 seq; __le32 version; __le32 flags; __le32 u64s; /* size of d[] in u64s */ __u8 encrypted_start[0]; __le16 _read_clock; /* no longer used */ __le16 _write_clock; /* Sequence number of oldest dirty journal entry */ __le64 last_seq; struct jset_entry start[0]; __u64 _data[]; } __packed __aligned(8); LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); #define BCH_JOURNAL_BUCKETS_MIN 8 /* Btree: */ enum btree_id_flags { BTREE_IS_extents = BIT(0), BTREE_IS_snapshots = BIT(1), BTREE_IS_snapshot_field = BIT(2), BTREE_IS_data = BIT(3), BTREE_IS_write_buffer = BIT(4), }; #define BCH_BTREE_IDS() \ x(extents, 0, \ BTREE_IS_extents| \ BTREE_IS_snapshots| \ BTREE_IS_data, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_error)| \ BIT_ULL(KEY_TYPE_cookie)| \ BIT_ULL(KEY_TYPE_extent)| \ BIT_ULL(KEY_TYPE_reservation)| \ BIT_ULL(KEY_TYPE_reflink_p)| \ BIT_ULL(KEY_TYPE_inline_data)) \ x(inodes, 1, \ BTREE_IS_snapshots, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_inode)| \ BIT_ULL(KEY_TYPE_inode_v2)| \ BIT_ULL(KEY_TYPE_inode_v3)| \ BIT_ULL(KEY_TYPE_inode_generation)) \ x(dirents, 2, \ BTREE_IS_snapshots, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_hash_whiteout)| \ BIT_ULL(KEY_TYPE_dirent)) \ x(xattrs, 3, \ BTREE_IS_snapshots, \ BIT_ULL(KEY_TYPE_whiteout)| \ BIT_ULL(KEY_TYPE_cookie)| \ BIT_ULL(KEY_TYPE_hash_whiteout)| \ BIT_ULL(KEY_TYPE_xattr)) \ x(alloc, 4, 0, \ BIT_ULL(KEY_TYPE_alloc)| \ BIT_ULL(KEY_TYPE_alloc_v2)| \ BIT_ULL(KEY_TYPE_alloc_v3)| \ BIT_ULL(KEY_TYPE_alloc_v4)) \ x(quotas, 5, 0, \ BIT_ULL(KEY_TYPE_quota)) \ x(stripes, 6, 0, \ BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, \ BTREE_IS_extents| \ BTREE_IS_data, \ BIT_ULL(KEY_TYPE_reflink_v)| \ BIT_ULL(KEY_TYPE_indirect_inline_data)| \ BIT_ULL(KEY_TYPE_error)) \ x(subvolumes, 8, 0, \ BIT_ULL(KEY_TYPE_subvolume)) \ x(snapshots, 9, 0, \ BIT_ULL(KEY_TYPE_snapshot)) \ x(lru, 10, \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_set)) \ x(freespace, 11, \ BTREE_IS_extents, \ BIT_ULL(KEY_TYPE_set)) \ x(need_discard, 12, 0, \ BIT_ULL(KEY_TYPE_set)) \ x(backpointers, 13, \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_backpointer)) \ x(bucket_gens, 14, 0, \ BIT_ULL(KEY_TYPE_bucket_gens)) \ x(snapshot_trees, 15, 0, \ BIT_ULL(KEY_TYPE_snapshot_tree)) \ x(deleted_inodes, 16, \ BTREE_IS_snapshot_field| \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_set)) \ x(logged_ops, 17, 0, \ BIT_ULL(KEY_TYPE_logged_op_truncate)| \ BIT_ULL(KEY_TYPE_logged_op_finsert)| \ BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \ x(rebalance_work, 18, \ BTREE_IS_snapshot_field| \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \ x(subvolume_children, 19, 0, \ BIT_ULL(KEY_TYPE_set)) \ x(accounting, 20, \ BTREE_IS_snapshot_field| \ BTREE_IS_write_buffer, \ BIT_ULL(KEY_TYPE_accounting)) \ enum btree_id { #define x(name, nr, ...) BTREE_ID_##name = nr, BCH_BTREE_IDS() #undef x BTREE_ID_NR }; /* * Maximum number of btrees that we will _ever_ have under the current scheme, * where we refer to them with 64 bit bitfields - and we also need a bit for * the interior btree node type: */ #define BTREE_ID_NR_MAX 63 static inline bool btree_id_is_alloc(enum btree_id id) { switch (id) { case BTREE_ID_alloc: case BTREE_ID_backpointers: case BTREE_ID_need_discard: case BTREE_ID_freespace: case BTREE_ID_bucket_gens: case BTREE_ID_lru: case BTREE_ID_accounting: return true; default: return false; } } #define BTREE_MAX_DEPTH 4U /* Btree nodes */ /* * Btree nodes * * On disk a btree node is a list/log of these; within each set the keys are * sorted */ struct bset { __le64 seq; /* * Highest journal entry this bset contains keys for. * If on recovery we don't see that journal entry, this bset is ignored: * this allows us to preserve the order of all index updates after a * crash, since the journal records a total order of all index updates * and anything that didn't make it to the journal doesn't get used. */ __le64 journal_seq; __le32 flags; __le16 version; __le16 u64s; /* count of d[] in u64s */ struct bkey_packed start[0]; __u64 _data[]; } __packed __aligned(8); LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4); LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5); LE32_BITMASK(BSET_SEPARATE_WHITEOUTS, struct bset, flags, 5, 6); /* Sector offset within the btree node: */ LE32_BITMASK(BSET_OFFSET, struct bset, flags, 16, 32); struct btree_node { struct bch_csum csum; __le64 magic; /* this flags field is encrypted, unlike bset->flags: */ __le64 flags; /* Closed interval: */ struct bpos min_key; struct bpos max_key; struct bch_extent_ptr _ptr; /* not used anymore */ struct bkey_format format; union { struct bset keys; struct { __u8 pad[22]; __le16 u64s; __u64 _data[0]; }; }; } __packed __aligned(8); LE64_BITMASK(BTREE_NODE_ID_LO, struct btree_node, flags, 0, 4); LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE, struct btree_node, flags, 8, 9); LE64_BITMASK(BTREE_NODE_ID_HI, struct btree_node, flags, 9, 25); /* 25-32 unused */ LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64); static inline __u64 BTREE_NODE_ID(struct btree_node *n) { return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4); } static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v) { SET_BTREE_NODE_ID_LO(n, v); SET_BTREE_NODE_ID_HI(n, v >> 4); } struct btree_node_entry { struct bch_csum csum; union { struct bset keys; struct { __u8 pad[22]; __le16 u64s; __u64 _data[0]; }; }; } __packed __aligned(8); #endif /* _BCACHEFS_FORMAT_H */ |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Waltop devices not fully compliant with HID standard * * Copyright (c) 2010 Nikolai Kondrashov */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* * There exists an official driver on the manufacturer's website, which * wasn't submitted to the kernel, for some reason. The official driver * doesn't seem to support extra features of some tablets, like wheels. * * It shows that the feature report ID 2 could be used to control any waltop * tablet input mode, switching it between "default", "tablet" and "ink". * * This driver only uses "default" mode for all the supported tablets. This * mode tries to be HID-compatible (not very successfully), but cripples the * resolution of some tablets. * * The "tablet" mode uses some proprietary, yet decipherable protocol, which * represents the correct resolution, but is possibly HID-incompatible (i.e. * indescribable by a report descriptor). * * The purpose of the "ink" mode is unknown. * * The feature reports needed for switching to each mode are these: * * 02 16 00 default * 02 16 01 tablet * 02 16 02 ink */ /* Size of the original report descriptor of Slim Tablet 5.8 inch */ #define SLIM_TABLET_5_8_INCH_RDESC_ORIG_SIZE 222 /* Fixed Slim Tablet 5.8 inch descriptor */ static const __u8 slim_tablet_5_8_inch_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x04, /* Report Size (4), */ 0x95, 0x01, /* Report Count (1), */ 0x80, /* Input, */ 0x09, 0x32, /* Usage (In Range), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x65, 0x13, /* Unit (Inch), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x34, /* Physical Minimum (0), */ 0x09, 0x30, /* Usage (X), */ 0x46, 0x88, 0x13, /* Physical Maximum (5000), */ 0x26, 0x10, 0x27, /* Logical Maximum (10000), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x46, 0xB8, 0x0B, /* Physical Maximum (3000), */ 0x26, 0x70, 0x17, /* Logical Maximum (6000), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; /* Size of the original report descriptor of Slim Tablet 12.1 inch */ #define SLIM_TABLET_12_1_INCH_RDESC_ORIG_SIZE 269 /* Fixed Slim Tablet 12.1 inch descriptor */ static const __u8 slim_tablet_12_1_inch_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x04, /* Report Size (4), */ 0x95, 0x01, /* Report Count (1), */ 0x80, /* Input, */ 0x09, 0x32, /* Usage (In Range), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x65, 0x13, /* Unit (Inch), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x34, /* Physical Minimum (0), */ 0x09, 0x30, /* Usage (X), */ 0x46, 0x10, 0x27, /* Physical Maximum (10000), */ 0x26, 0x20, 0x4E, /* Logical Maximum (20000), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x46, 0x6A, 0x18, /* Physical Maximum (6250), */ 0x26, 0xD4, 0x30, /* Logical Maximum (12500), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; /* Size of the original report descriptor of Q Pad */ #define Q_PAD_RDESC_ORIG_SIZE 241 /* Fixed Q Pad descriptor */ static const __u8 q_pad_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x04, /* Report Size (4), */ 0x95, 0x01, /* Report Count (1), */ 0x80, /* Input, */ 0x09, 0x32, /* Usage (In Range), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x65, 0x13, /* Unit (Inch), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x34, /* Physical Minimum (0), */ 0x09, 0x30, /* Usage (X), */ 0x46, 0x70, 0x17, /* Physical Maximum (6000), */ 0x26, 0x00, 0x30, /* Logical Maximum (12288), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x46, 0x94, 0x11, /* Physical Maximum (4500), */ 0x26, 0x00, 0x24, /* Logical Maximum (9216), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; /* Size of the original report descriptor of tablet with PID 0038 */ #define PID_0038_RDESC_ORIG_SIZE 241 /* * Fixed report descriptor for tablet with PID 0038. */ static const __u8 pid_0038_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x04, /* Report Size (4), */ 0x95, 0x01, /* Report Count (1), */ 0x80, /* Input, */ 0x09, 0x32, /* Usage (In Range), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x65, 0x13, /* Unit (Inch), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x34, /* Physical Minimum (0), */ 0x09, 0x30, /* Usage (X), */ 0x46, 0x2E, 0x22, /* Physical Maximum (8750), */ 0x26, 0x00, 0x46, /* Logical Maximum (17920), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x46, 0x82, 0x14, /* Physical Maximum (5250), */ 0x26, 0x00, 0x2A, /* Logical Maximum (10752), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; /* Size of the original report descriptor of Media Tablet 10.6 inch */ #define MEDIA_TABLET_10_6_INCH_RDESC_ORIG_SIZE 300 /* Fixed Media Tablet 10.6 inch descriptor */ static const __u8 media_tablet_10_6_inch_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x04, /* Report Size (4), */ 0x95, 0x01, /* Report Count (1), */ 0x80, /* Input, */ 0x75, 0x01, /* Report Size (1), */ 0x09, 0x32, /* Usage (In Range), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x65, 0x13, /* Unit (Inch), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x34, /* Physical Minimum (0), */ 0x09, 0x30, /* Usage (X), */ 0x46, 0x28, 0x23, /* Physical Maximum (9000), */ 0x26, 0x50, 0x46, /* Logical Maximum (18000), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x46, 0x7C, 0x15, /* Physical Maximum (5500), */ 0x26, 0xF8, 0x2A, /* Logical Maximum (11000), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0, /* End Collection, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x02, /* Usage (Mouse), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x01, /* Report ID (1), */ 0x09, 0x01, /* Usage (Pointer), */ 0xA0, /* Collection (Physical), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x95, 0x02, /* Report Count (2), */ 0x15, 0xFF, /* Logical Minimum (-1), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x09, 0x38, /* Usage (Wheel), */ 0x0B, 0x38, 0x02, /* Usage (Consumer AC Pan), */ 0x0C, 0x00, 0x81, 0x06, /* Input (Variable, Relative), */ 0x95, 0x02, /* Report Count (2), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0, /* End Collection, */ 0xC0, /* End Collection, */ 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x0D, /* Report ID (13), */ 0x95, 0x01, /* Report Count (1), */ 0x75, 0x10, /* Report Size (16), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x09, 0xB6, /* Usage (Scan Previous Track), */ 0x09, 0xB5, /* Usage (Scan Next Track), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x15, 0x0C, /* Logical Minimum (12), */ 0x25, 0x17, /* Logical Maximum (23), */ 0x75, 0x05, /* Report Size (5), */ 0x80, /* Input, */ 0x75, 0x03, /* Report Size (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x20, /* Report Size (32), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0, /* End Collection, */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x0C, /* Report ID (12), */ 0x75, 0x01, /* Report Size (1), */ 0x09, 0xE9, /* Usage (Volume Inc), */ 0x09, 0xEA, /* Usage (Volume Dec), */ 0x09, 0xE2, /* Usage (Mute), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x06, /* Input (Variable, Relative), */ 0x95, 0x35, /* Report Count (53), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0 /* End Collection */ }; /* Size of the original report descriptor of Media Tablet 14.1 inch */ #define MEDIA_TABLET_14_1_INCH_RDESC_ORIG_SIZE 309 /* Fixed Media Tablet 14.1 inch descriptor */ static const __u8 media_tablet_14_1_inch_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x04, /* Report Size (4), */ 0x95, 0x01, /* Report Count (1), */ 0x80, /* Input, */ 0x75, 0x01, /* Report Size (1), */ 0x09, 0x32, /* Usage (In Range), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x65, 0x13, /* Unit (Inch), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x34, /* Physical Minimum (0), */ 0x09, 0x30, /* Usage (X), */ 0x46, 0xE0, 0x2E, /* Physical Maximum (12000), */ 0x26, 0xFF, 0x3F, /* Logical Maximum (16383), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x46, 0x52, 0x1C, /* Physical Maximum (7250), */ 0x26, 0xFF, 0x3F, /* Logical Maximum (16383), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0, /* End Collection, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x02, /* Usage (Mouse), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x01, /* Report ID (1), */ 0x09, 0x01, /* Usage (Pointer), */ 0xA0, /* Collection (Physical), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x95, 0x02, /* Report Count (2), */ 0x15, 0xFF, /* Logical Minimum (-1), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x09, 0x38, /* Usage (Wheel), */ 0x0B, 0x38, 0x02, /* Usage (Consumer AC Pan), */ 0x0C, 0x00, 0x81, 0x06, /* Input (Variable, Relative), */ 0xC0, /* End Collection, */ 0xC0, /* End Collection, */ 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x0D, /* Report ID (13), */ 0x95, 0x01, /* Report Count (1), */ 0x75, 0x10, /* Report Size (16), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x09, 0xB6, /* Usage (Scan Previous Track), */ 0x09, 0xB5, /* Usage (Scan Next Track), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x08, /* Usage (00h), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x15, 0x0C, /* Logical Minimum (12), */ 0x25, 0x17, /* Logical Maximum (23), */ 0x75, 0x05, /* Report Size (5), */ 0x80, /* Input, */ 0x75, 0x03, /* Report Size (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x20, /* Report Size (32), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0, /* End Collection, */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x0C, /* Report ID (12), */ 0x75, 0x01, /* Report Size (1), */ 0x09, 0xE9, /* Usage (Volume Inc), */ 0x09, 0xEA, /* Usage (Volume Dec), */ 0x09, 0xE2, /* Usage (Mute), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x06, /* Input (Variable, Relative), */ 0x75, 0x05, /* Report Size (5), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0 /* End Collection */ }; /* Size of the original report descriptor of Sirius Battery Free Tablet */ #define SIRIUS_BATTERY_FREE_TABLET_RDESC_ORIG_SIZE 335 /* Fixed Sirius Battery Free Tablet descriptor */ static const __u8 sirius_battery_free_tablet_rdesc_fixed[] = { 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x02, /* Usage (Pen), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x95, 0x01, /* Report Count (1), */ 0x15, 0x01, /* Logical Minimum (1), */ 0x25, 0x03, /* Logical Maximum (3), */ 0x75, 0x02, /* Report Size (2), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x80, /* Input, */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x09, 0x3C, /* Usage (Invert), */ 0x81, 0x02, /* Input (Variable), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x09, 0x32, /* Usage (In Range), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x55, 0xFD, /* Unit Exponent (-3), */ 0x65, 0x13, /* Unit (Inch), */ 0x34, /* Physical Minimum (0), */ 0x14, /* Logical Minimum (0), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x46, 0x10, 0x27, /* Physical Maximum (10000), */ 0x26, 0x20, 0x4E, /* Logical Maximum (20000), */ 0x09, 0x30, /* Usage (X), */ 0x81, 0x02, /* Input (Variable), */ 0x46, 0x70, 0x17, /* Physical Maximum (6000), */ 0x26, 0xE0, 0x2E, /* Logical Maximum (12000), */ 0x09, 0x31, /* Usage (Y), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x01, /* Report Count (1), */ 0x14, /* Logical Minimum (0), */ 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x81, 0x02, /* Input (Variable), */ 0xA4, /* Push, */ 0x55, 0xFE, /* Unit Exponent (-2), */ 0x65, 0x12, /* Unit (Radians), */ 0x35, 0x97, /* Physical Minimum (-105), */ 0x45, 0x69, /* Physical Maximum (105), */ 0x15, 0x97, /* Logical Minimum (-105), */ 0x25, 0x69, /* Logical Maximum (105), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x02, /* Report Count (2), */ 0x09, 0x3D, /* Usage (X Tilt), */ 0x09, 0x3E, /* Usage (Y Tilt), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0xC0, /* End Collection, */ 0xC0, /* End Collection, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x02, /* Usage (Mouse), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x01, /* Report ID (1), */ 0x09, 0x01, /* Usage (Pointer), */ 0xA0, /* Collection (Physical), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x09, 0x38, /* Usage (Wheel), */ 0x15, 0xFF, /* Logical Minimum (-1), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x06, /* Input (Variable, Relative), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0, /* End Collection, */ 0xC0, /* End Collection, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x06, /* Usage (Keyboard), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x0D, /* Report ID (13), */ 0x05, 0x07, /* Usage Page (Keyboard), */ 0x19, 0xE0, /* Usage Minimum (KB Leftcontrol), */ 0x29, 0xE7, /* Usage Maximum (KB Right GUI), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x08, /* Report Count (8), */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x01, /* Input (Constant), */ 0x18, /* Usage Minimum (None), */ 0x29, 0x65, /* Usage Maximum (KB Application), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x65, /* Logical Maximum (101), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x05, /* Report Count (5), */ 0x80, /* Input, */ 0xC0, /* End Collection, */ 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x0C, /* Report ID (12), */ 0x09, 0xE9, /* Usage (Volume Inc), */ 0x09, 0xEA, /* Usage (Volume Dec), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x02, /* Report Count (2), */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x06, /* Report Size (6), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0x75, 0x10, /* Report Size (16), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0 /* End Collection */ }; static const __u8 *waltop_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { switch (hdev->product) { case USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH: if (*rsize == SLIM_TABLET_5_8_INCH_RDESC_ORIG_SIZE) { *rsize = sizeof(slim_tablet_5_8_inch_rdesc_fixed); return slim_tablet_5_8_inch_rdesc_fixed; } break; case USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH: if (*rsize == SLIM_TABLET_12_1_INCH_RDESC_ORIG_SIZE) { *rsize = sizeof(slim_tablet_12_1_inch_rdesc_fixed); return slim_tablet_12_1_inch_rdesc_fixed; } break; case USB_DEVICE_ID_WALTOP_Q_PAD: if (*rsize == Q_PAD_RDESC_ORIG_SIZE) { *rsize = sizeof(q_pad_rdesc_fixed); return q_pad_rdesc_fixed; } break; case USB_DEVICE_ID_WALTOP_PID_0038: if (*rsize == PID_0038_RDESC_ORIG_SIZE) { *rsize = sizeof(pid_0038_rdesc_fixed); return pid_0038_rdesc_fixed; } break; case USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH: if (*rsize == MEDIA_TABLET_10_6_INCH_RDESC_ORIG_SIZE) { *rsize = sizeof(media_tablet_10_6_inch_rdesc_fixed); return media_tablet_10_6_inch_rdesc_fixed; } break; case USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH: if (*rsize == MEDIA_TABLET_14_1_INCH_RDESC_ORIG_SIZE) { *rsize = sizeof(media_tablet_14_1_inch_rdesc_fixed); return media_tablet_14_1_inch_rdesc_fixed; } break; case USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET: if (*rsize == SIRIUS_BATTERY_FREE_TABLET_RDESC_ORIG_SIZE) { *rsize = sizeof(sirius_battery_free_tablet_rdesc_fixed); return sirius_battery_free_tablet_rdesc_fixed; } break; } return rdesc; } static int waltop_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { /* If this is a pen input report */ if (report->type == HID_INPUT_REPORT && report->id == 16 && size >= 8) { /* * Ignore reported pressure when a barrel button is pressed, * because it is rarely correct. */ /* If a barrel button is pressed */ if ((data[1] & 0xF) > 1) { /* Report zero pressure */ data[6] = 0; data[7] = 0; } } /* If this is a pen input report of Sirius Battery Free Tablet */ if (hdev->product == USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET && report->type == HID_INPUT_REPORT && report->id == 16 && size == 10) { /* * The tablet reports tilt as roughly sin(a)*21 (18 means 60 * degrees). * * This array stores angles as radians * 100, corresponding to * reported values up to 60 degrees, as expected by userspace. */ static const s8 tilt_to_radians[] = { 0, 5, 10, 14, 19, 24, 29, 34, 40, 45, 50, 56, 62, 68, 74, 81, 88, 96, 105 }; s8 tilt_x = (s8)data[8]; s8 tilt_y = (s8)data[9]; s8 sign_x = tilt_x >= 0 ? 1 : -1; s8 sign_y = tilt_y >= 0 ? 1 : -1; tilt_x *= sign_x; tilt_y *= sign_y; /* * Reverse the Y Tilt direction to match the HID standard and * userspace expectations. See HID Usage Tables v1.12 16.3.2 * Tilt Orientation. */ sign_y *= -1; /* * This effectively clamps reported tilt to 60 degrees - the * range expected by userspace */ if (tilt_x > ARRAY_SIZE(tilt_to_radians) - 1) tilt_x = ARRAY_SIZE(tilt_to_radians) - 1; if (tilt_y > ARRAY_SIZE(tilt_to_radians) - 1) tilt_y = ARRAY_SIZE(tilt_to_radians) - 1; data[8] = tilt_to_radians[tilt_x] * sign_x; data[9] = tilt_to_radians[tilt_y] * sign_y; } return 0; } static const struct hid_device_id waltop_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_Q_PAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_PID_0038) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) }, { } }; MODULE_DEVICE_TABLE(hid, waltop_devices); static struct hid_driver waltop_driver = { .name = "waltop", .id_table = waltop_devices, .report_fixup = waltop_report_fixup, .raw_event = waltop_raw_event, }; module_hid_driver(waltop_driver); MODULE_DESCRIPTION("HID driver for Waltop devices not fully compliant with HID standard"); MODULE_LICENSE("GPL"); |
| 3 5 1 2 2 3 1 2 2 3 1 3 1 4 4 4 4 30 4 1 1 1 1 5 1 3 3 1 1 1 6 1 10 1 4 5 18 1 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2008, Christoph Hellwig * All Rights Reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_icache.h" #include "xfs_qm.h" static int xfs_qm_fill_state( struct qc_type_state *tstate, struct xfs_mount *mp, xfs_dqtype_t type) { struct xfs_inode *ip; struct xfs_def_quota *defq; int error; error = xfs_qm_qino_load(mp, type, &ip); if (error) { tstate->ino = NULLFSINO; return error != -ENOENT ? error : 0; } defq = xfs_get_defquota(mp->m_quotainfo, type); tstate->ino = ip->i_ino; tstate->flags |= QCI_SYSFILE; tstate->blocks = ip->i_nblocks; tstate->nextents = ip->i_df.if_nextents; tstate->spc_timelimit = (u32)defq->blk.time; tstate->ino_timelimit = (u32)defq->ino.time; tstate->rt_spc_timelimit = (u32)defq->rtb.time; tstate->spc_warnlimit = 0; tstate->ino_warnlimit = 0; tstate->rt_spc_warnlimit = 0; xfs_irele(ip); return 0; } /* * Return quota status information, such as enforcements, quota file inode * numbers etc. */ static int xfs_fs_get_quota_state( struct super_block *sb, struct qc_state *state) { struct xfs_mount *mp = XFS_M(sb); struct xfs_quotainfo *q = mp->m_quotainfo; int error; memset(state, 0, sizeof(*state)); if (!XFS_IS_QUOTA_ON(mp)) return 0; state->s_incoredqs = q->qi_dquots; if (XFS_IS_UQUOTA_ON(mp)) state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED; if (XFS_IS_UQUOTA_ENFORCED(mp)) state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED; if (XFS_IS_GQUOTA_ON(mp)) state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED; if (XFS_IS_GQUOTA_ENFORCED(mp)) state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED; if (XFS_IS_PQUOTA_ON(mp)) state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED; if (XFS_IS_PQUOTA_ENFORCED(mp)) state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED; error = xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, XFS_DQTYPE_USER); if (error) return error; error = xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, XFS_DQTYPE_GROUP); if (error) return error; error = xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, XFS_DQTYPE_PROJ); if (error) return error; return 0; } STATIC xfs_dqtype_t xfs_quota_type(int type) { switch (type) { case USRQUOTA: return XFS_DQTYPE_USER; case GRPQUOTA: return XFS_DQTYPE_GROUP; default: return XFS_DQTYPE_PROJ; } } #define XFS_QC_SETINFO_MASK (QC_TIMER_MASK) /* * Adjust quota timers & warnings */ static int xfs_fs_set_info( struct super_block *sb, int type, struct qc_info *info) { struct xfs_mount *mp = XFS_M(sb); struct qc_dqblk newlim; if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK) return -EINVAL; if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0) return 0; newlim.d_fieldmask = info->i_fieldmask; newlim.d_spc_timer = info->i_spc_timelimit; newlim.d_ino_timer = info->i_ino_timelimit; newlim.d_rt_spc_timer = info->i_rt_spc_timelimit; newlim.d_ino_warns = info->i_ino_warnlimit; newlim.d_spc_warns = info->i_spc_warnlimit; newlim.d_rt_spc_warns = info->i_rt_spc_warnlimit; return xfs_qm_scall_setqlim(mp, 0, xfs_quota_type(type), &newlim); } static unsigned int xfs_quota_flags(unsigned int uflags) { unsigned int flags = 0; if (uflags & FS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; if (uflags & FS_QUOTA_PDQ_ACCT) flags |= XFS_PQUOTA_ACCT; if (uflags & FS_QUOTA_GDQ_ACCT) flags |= XFS_GQUOTA_ACCT; if (uflags & FS_QUOTA_UDQ_ENFD) flags |= XFS_UQUOTA_ENFD; if (uflags & FS_QUOTA_GDQ_ENFD) flags |= XFS_GQUOTA_ENFD; if (uflags & FS_QUOTA_PDQ_ENFD) flags |= XFS_PQUOTA_ENFD; return flags; } STATIC int xfs_quota_enable( struct super_block *sb, unsigned int uflags) { struct xfs_mount *mp = XFS_M(sb); if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; return xfs_qm_scall_quotaon(mp, xfs_quota_flags(uflags)); } STATIC int xfs_quota_disable( struct super_block *sb, unsigned int uflags) { struct xfs_mount *mp = XFS_M(sb); if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; return xfs_qm_scall_quotaoff(mp, xfs_quota_flags(uflags)); } STATIC int xfs_fs_rm_xquota( struct super_block *sb, unsigned int uflags) { struct xfs_mount *mp = XFS_M(sb); unsigned int flags = 0; if (sb_rdonly(sb)) return -EROFS; if (XFS_IS_QUOTA_ON(mp)) return -EINVAL; if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA)) return -EINVAL; if (uflags & FS_USER_QUOTA) flags |= XFS_QMOPT_UQUOTA; if (uflags & FS_GROUP_QUOTA) flags |= XFS_QMOPT_GQUOTA; if (uflags & FS_PROJ_QUOTA) flags |= XFS_QMOPT_PQUOTA; return xfs_qm_scall_trunc_qfiles(mp, flags); } STATIC int xfs_fs_get_dqblk( struct super_block *sb, struct kqid qid, struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); xfs_dqid_t id; if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; id = from_kqid(&init_user_ns, qid); return xfs_qm_scall_getquota(mp, id, xfs_quota_type(qid.type), qdq); } /* Return quota info for active quota >= this qid */ STATIC int xfs_fs_get_nextdqblk( struct super_block *sb, struct kqid *qid, struct qc_dqblk *qdq) { int ret; struct xfs_mount *mp = XFS_M(sb); xfs_dqid_t id; if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; id = from_kqid(&init_user_ns, *qid); ret = xfs_qm_scall_getquota_next(mp, &id, xfs_quota_type(qid->type), qdq); if (ret) return ret; /* ID may be different, so convert back what we got */ *qid = make_kqid(current_user_ns(), qid->type, id); return 0; } STATIC int xfs_fs_set_dqblk( struct super_block *sb, struct kqid qid, struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); if (sb_rdonly(sb)) return -EROFS; if (!XFS_IS_QUOTA_ON(mp)) return -ENOSYS; return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), xfs_quota_type(qid.type), qdq); } const struct quotactl_ops xfs_quotactl_operations = { .get_state = xfs_fs_get_quota_state, .set_info = xfs_fs_set_info, .quota_enable = xfs_quota_enable, .quota_disable = xfs_quota_disable, .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, .get_nextdqblk = xfs_fs_get_nextdqblk, .set_dqblk = xfs_fs_set_dqblk, }; |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 | // SPDX-License-Identifier: GPL-2.0-or-later /* Kernel cryptographic api. * cast5.c - Cast5 cipher algorithm (rfc2144). * * Derived from GnuPG implementation of cast5. * * Major Changes. * Complete conformance to rfc2144. * Supports key size from 40 to 128 bits. * * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. * Copyright (C) 2003 Kartikey Mahendra Bhatt <kartik_me@hotmail.com>. */ #include <linux/unaligned.h> #include <crypto/algapi.h> #include <linux/init.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/types.h> #include <crypto/cast5.h> static const u32 s5[256] = { 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f, 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a, 0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff, 0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02, 0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a, 0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7, 0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9, 0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981, 0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774, 0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655, 0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2, 0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910, 0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1, 0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da, 0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049, 0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f, 0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba, 0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be, 0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3, 0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840, 0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4, 0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2, 0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7, 0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5, 0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e, 0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e, 0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801, 0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad, 0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0, 0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20, 0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8, 0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4 }; static const u32 s6[256] = { 0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac, 0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138, 0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367, 0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98, 0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072, 0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3, 0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd, 0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8, 0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9, 0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54, 0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387, 0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc, 0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf, 0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf, 0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f, 0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289, 0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950, 0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f, 0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b, 0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be, 0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13, 0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976, 0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0, 0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891, 0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da, 0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc, 0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084, 0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25, 0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121, 0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5, 0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd, 0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f }; static const u32 s7[256] = { 0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f, 0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de, 0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43, 0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19, 0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2, 0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516, 0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88, 0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816, 0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756, 0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a, 0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264, 0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688, 0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28, 0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3, 0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7, 0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06, 0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033, 0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a, 0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566, 0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509, 0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962, 0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e, 0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c, 0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c, 0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285, 0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301, 0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be, 0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767, 0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647, 0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914, 0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c, 0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3 }; static const u32 sb8[256] = { 0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5, 0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc, 0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd, 0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d, 0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2, 0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862, 0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc, 0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c, 0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e, 0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039, 0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8, 0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42, 0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5, 0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472, 0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225, 0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c, 0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb, 0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054, 0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70, 0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc, 0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c, 0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3, 0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4, 0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101, 0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f, 0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e, 0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a, 0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c, 0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384, 0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c, 0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82, 0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e }; #define s1 cast_s1 #define s2 cast_s2 #define s3 cast_s3 #define s4 cast_s4 #define F1(D, m, r) ((I = ((m) + (D))), (I = rol32(I, (r))), \ (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff])) #define F2(D, m, r) ((I = ((m) ^ (D))), (I = rol32(I, (r))), \ (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff])) #define F3(D, m, r) ((I = ((m) - (D))), (I = rol32(I, (r))), \ (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff])) void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) { u32 l, r, t; u32 I; /* used by the Fx macros */ u32 *Km; u8 *Kr; Km = c->Km; Kr = c->Kr; /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.) */ l = get_unaligned_be32(inbuf); r = get_unaligned_be32(inbuf + 4); /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows: * Li = Ri-1; * Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2 * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1. * Rounds 2, 5, 8, 11, and 14 use f function Type 2. * Rounds 3, 6, 9, 12, and 15 use f function Type 3. */ t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); if (!(c->rr)) { t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); } /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and * concatenate to form the ciphertext.) */ put_unaligned_be32(r, outbuf); put_unaligned_be32(l, outbuf + 4); } EXPORT_SYMBOL_GPL(__cast5_encrypt); static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) { __cast5_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf); } void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf) { u32 l, r, t; u32 I; u32 *Km; u8 *Kr; Km = c->Km; Kr = c->Kr; l = get_unaligned_be32(inbuf); r = get_unaligned_be32(inbuf + 4); if (!(c->rr)) { t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); } t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); put_unaligned_be32(r, outbuf); put_unaligned_be32(l, outbuf + 4); } EXPORT_SYMBOL_GPL(__cast5_decrypt); static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) { __cast5_decrypt(crypto_tfm_ctx(tfm), outbuf, inbuf); } static void key_schedule(u32 *x, u32 *z, u32 *k) { #define xi(i) ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff) #define zi(i) ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff) z[0] = x[0] ^ s5[xi(13)] ^ s6[xi(15)] ^ s7[xi(12)] ^ sb8[xi(14)] ^ s7[xi(8)]; z[1] = x[2] ^ s5[zi(0)] ^ s6[zi(2)] ^ s7[zi(1)] ^ sb8[zi(3)] ^ sb8[xi(10)]; z[2] = x[3] ^ s5[zi(7)] ^ s6[zi(6)] ^ s7[zi(5)] ^ sb8[zi(4)] ^ s5[xi(9)]; z[3] = x[1] ^ s5[zi(10)] ^ s6[zi(9)] ^ s7[zi(11)] ^ sb8[zi(8)] ^ s6[xi(11)]; k[0] = s5[zi(8)] ^ s6[zi(9)] ^ s7[zi(7)] ^ sb8[zi(6)] ^ s5[zi(2)]; k[1] = s5[zi(10)] ^ s6[zi(11)] ^ s7[zi(5)] ^ sb8[zi(4)] ^ s6[zi(6)]; k[2] = s5[zi(12)] ^ s6[zi(13)] ^ s7[zi(3)] ^ sb8[zi(2)] ^ s7[zi(9)]; k[3] = s5[zi(14)] ^ s6[zi(15)] ^ s7[zi(1)] ^ sb8[zi(0)] ^ sb8[zi(12)]; x[0] = z[2] ^ s5[zi(5)] ^ s6[zi(7)] ^ s7[zi(4)] ^ sb8[zi(6)] ^ s7[zi(0)]; x[1] = z[0] ^ s5[xi(0)] ^ s6[xi(2)] ^ s7[xi(1)] ^ sb8[xi(3)] ^ sb8[zi(2)]; x[2] = z[1] ^ s5[xi(7)] ^ s6[xi(6)] ^ s7[xi(5)] ^ sb8[xi(4)] ^ s5[zi(1)]; x[3] = z[3] ^ s5[xi(10)] ^ s6[xi(9)] ^ s7[xi(11)] ^ sb8[xi(8)] ^ s6[zi(3)]; k[4] = s5[xi(3)] ^ s6[xi(2)] ^ s7[xi(12)] ^ sb8[xi(13)] ^ s5[xi(8)]; k[5] = s5[xi(1)] ^ s6[xi(0)] ^ s7[xi(14)] ^ sb8[xi(15)] ^ s6[xi(13)]; k[6] = s5[xi(7)] ^ s6[xi(6)] ^ s7[xi(8)] ^ sb8[xi(9)] ^ s7[xi(3)]; k[7] = s5[xi(5)] ^ s6[xi(4)] ^ s7[xi(10)] ^ sb8[xi(11)] ^ sb8[xi(7)]; z[0] = x[0] ^ s5[xi(13)] ^ s6[xi(15)] ^ s7[xi(12)] ^ sb8[xi(14)] ^ s7[xi(8)]; z[1] = x[2] ^ s5[zi(0)] ^ s6[zi(2)] ^ s7[zi(1)] ^ sb8[zi(3)] ^ sb8[xi(10)]; z[2] = x[3] ^ s5[zi(7)] ^ s6[zi(6)] ^ s7[zi(5)] ^ sb8[zi(4)] ^ s5[xi(9)]; z[3] = x[1] ^ s5[zi(10)] ^ s6[zi(9)] ^ s7[zi(11)] ^ sb8[zi(8)] ^ s6[xi(11)]; k[8] = s5[zi(3)] ^ s6[zi(2)] ^ s7[zi(12)] ^ sb8[zi(13)] ^ s5[zi(9)]; k[9] = s5[zi(1)] ^ s6[zi(0)] ^ s7[zi(14)] ^ sb8[zi(15)] ^ s6[zi(12)]; k[10] = s5[zi(7)] ^ s6[zi(6)] ^ s7[zi(8)] ^ sb8[zi(9)] ^ s7[zi(2)]; k[11] = s5[zi(5)] ^ s6[zi(4)] ^ s7[zi(10)] ^ sb8[zi(11)] ^ sb8[zi(6)]; x[0] = z[2] ^ s5[zi(5)] ^ s6[zi(7)] ^ s7[zi(4)] ^ sb8[zi(6)] ^ s7[zi(0)]; x[1] = z[0] ^ s5[xi(0)] ^ s6[xi(2)] ^ s7[xi(1)] ^ sb8[xi(3)] ^ sb8[zi(2)]; x[2] = z[1] ^ s5[xi(7)] ^ s6[xi(6)] ^ s7[xi(5)] ^ sb8[xi(4)] ^ s5[zi(1)]; x[3] = z[3] ^ s5[xi(10)] ^ s6[xi(9)] ^ s7[xi(11)] ^ sb8[xi(8)] ^ s6[zi(3)]; k[12] = s5[xi(8)] ^ s6[xi(9)] ^ s7[xi(7)] ^ sb8[xi(6)] ^ s5[xi(3)]; k[13] = s5[xi(10)] ^ s6[xi(11)] ^ s7[xi(5)] ^ sb8[xi(4)] ^ s6[xi(7)]; k[14] = s5[xi(12)] ^ s6[xi(13)] ^ s7[xi(3)] ^ sb8[xi(2)] ^ s7[xi(8)]; k[15] = s5[xi(14)] ^ s6[xi(15)] ^ s7[xi(1)] ^ sb8[xi(0)] ^ sb8[xi(13)]; #undef xi #undef zi } int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct cast5_ctx *c = crypto_tfm_ctx(tfm); int i; u32 x[4]; u32 z[4]; u32 k[16]; __be32 p_key[4]; c->rr = key_len <= 10 ? 1 : 0; memset(p_key, 0, 16); memcpy(p_key, key, key_len); x[0] = be32_to_cpu(p_key[0]); x[1] = be32_to_cpu(p_key[1]); x[2] = be32_to_cpu(p_key[2]); x[3] = be32_to_cpu(p_key[3]); key_schedule(x, z, k); for (i = 0; i < 16; i++) c->Km[i] = k[i]; key_schedule(x, z, k); for (i = 0; i < 16; i++) c->Kr[i] = k[i] & 0x1f; return 0; } EXPORT_SYMBOL_GPL(cast5_setkey); static struct crypto_alg alg = { .cra_name = "cast5", .cra_driver_name = "cast5-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAST5_BLOCK_SIZE, .cra_ctxsize = sizeof(struct cast5_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = CAST5_MIN_KEY_SIZE, .cia_max_keysize = CAST5_MAX_KEY_SIZE, .cia_setkey = cast5_setkey, .cia_encrypt = cast5_encrypt, .cia_decrypt = cast5_decrypt } } }; static int __init cast5_mod_init(void) { return crypto_register_alg(&alg); } static void __exit cast5_mod_fini(void) { crypto_unregister_alg(&alg); } subsys_initcall(cast5_mod_init); module_exit(cast5_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast5 Cipher Algorithm"); MODULE_ALIAS_CRYPTO("cast5"); MODULE_ALIAS_CRYPTO("cast5-generic"); |
| 10 1 8 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match Hop-by-Hop and Destination parameters. */ /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ipv6.h> #include <asm/byteorder.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_opts.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); MODULE_ALIAS("ip6t_dst"); /* * (Type & 0xC0) >> 6 * 0 -> ignorable * 1 -> must drop the packet * 2 -> send ICMP PARM PROB regardless and drop packet * 3 -> Send ICMP if not a multicast address and drop packet * (Type & 0x20) >> 5 * 0 -> invariant * 1 -> can change the routing * (Type & 0x1F) Type * 0 -> Pad1 (only 1 byte!) * 1 -> PadN LENGTH info (total length = length + 2) * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) * 5 -> RTALERT 2 x x */ static struct xt_match hbh_mt6_reg[] __read_mostly; static bool hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6_opt_hdr _optsh; const struct ipv6_opt_hdr *oh; const struct ip6t_opts *optinfo = par->matchinfo; unsigned int temp; unsigned int ptr = 0; unsigned int hdrlen = 0; bool ret = false; u8 _opttype; u8 _optlen; const u_int8_t *tp = NULL; const u_int8_t *lp = NULL; unsigned int optlen; int err; err = ipv6_find_hdr(skb, &ptr, (par->match == &hbh_mt6_reg[0]) ? NEXTHDR_HOP : NEXTHDR_DEST, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; return false; } oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); if (oh == NULL) { par->hotdrop = true; return false; } hdrlen = ipv6_optlen(oh); if (skb->len - ptr < hdrlen) { /* Packet smaller than it's length field */ return false; } pr_debug("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); pr_debug("len %02X %04X %02X ", optinfo->hdrlen, hdrlen, (!(optinfo->flags & IP6T_OPTS_LEN) || ((optinfo->hdrlen == hdrlen) ^ !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); ret = (!(optinfo->flags & IP6T_OPTS_LEN) || ((optinfo->hdrlen == hdrlen) ^ !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); ptr += 2; hdrlen -= 2; if (!(optinfo->flags & IP6T_OPTS_OPTS)) { return ret; } else { pr_debug("Strict "); pr_debug("#%d ", optinfo->optsnr); for (temp = 0; temp < optinfo->optsnr; temp++) { /* type field exists ? */ if (hdrlen < 1) break; tp = skb_header_pointer(skb, ptr, sizeof(_opttype), &_opttype); if (tp == NULL) break; /* Type check */ if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { pr_debug("Tbad %02X %02X\n", *tp, (optinfo->opts[temp] & 0xFF00) >> 8); return false; } else { pr_debug("Tok "); } /* Length check */ if (*tp) { u16 spec_len; /* length field exists ? */ if (hdrlen < 2) break; lp = skb_header_pointer(skb, ptr + 1, sizeof(_optlen), &_optlen); if (lp == NULL) break; spec_len = optinfo->opts[temp] & 0x00FF; if (spec_len != 0x00FF && spec_len != *lp) { pr_debug("Lbad %02X %04X\n", *lp, spec_len); return false; } pr_debug("Lok "); optlen = *lp + 2; } else { pr_debug("Pad1\n"); optlen = 1; } /* Step to the next */ pr_debug("len%04X\n", optlen); if ((ptr > skb->len - optlen || hdrlen < optlen) && temp < optinfo->optsnr - 1) { pr_debug("new pointer is too large!\n"); break; } ptr += optlen; hdrlen -= optlen; } if (temp == optinfo->optsnr) return ret; else return false; } return false; } static int hbh_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_opts *optsinfo = par->matchinfo; if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { pr_debug("unknown flags %X\n", optsinfo->invflags); return -EINVAL; } if (optsinfo->flags & IP6T_OPTS_NSTRICT) { pr_debug("Not strict - not implemented"); return -EINVAL; } return 0; } static struct xt_match hbh_mt6_reg[] __read_mostly = { { /* Note, hbh_mt6 relies on the order of hbh_mt6_reg */ .name = "hbh", .family = NFPROTO_IPV6, .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), .checkentry = hbh_mt6_check, .me = THIS_MODULE, }, { .name = "dst", .family = NFPROTO_IPV6, .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), .checkentry = hbh_mt6_check, .me = THIS_MODULE, }, }; static int __init hbh_mt6_init(void) { return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg)); } static void __exit hbh_mt6_exit(void) { xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg)); } module_init(hbh_mt6_init); module_exit(hbh_mt6_exit); |
| 10 13 16 5 10 10 1 1 1 1 3 17 17 1 1 2 25 25 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Bluetooth virtual HCI driver * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com> * Copyright (C) 2004-2006 Marcel Holtmann <marcel@holtmann.org> */ #include <linux/module.h> #include <linux/unaligned.h> #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/poll.h> #include <linux/skbuff.h> #include <linux/miscdevice.h> #include <linux/debugfs.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #define VERSION "1.5" static bool amp; struct vhci_data { struct hci_dev *hdev; wait_queue_head_t read_wait; struct sk_buff_head readq; struct mutex open_mutex; struct delayed_work open_timeout; struct work_struct suspend_work; bool suspended; bool wakeup; __u16 msft_opcode; bool aosp_capable; atomic_t initialized; }; static int vhci_open_dev(struct hci_dev *hdev) { return 0; } static int vhci_close_dev(struct hci_dev *hdev) { struct vhci_data *data = hci_get_drvdata(hdev); skb_queue_purge(&data->readq); return 0; } static int vhci_flush(struct hci_dev *hdev) { struct vhci_data *data = hci_get_drvdata(hdev); skb_queue_purge(&data->readq); return 0; } static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct vhci_data *data = hci_get_drvdata(hdev); memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); skb_queue_tail(&data->readq, skb); if (atomic_read(&data->initialized)) wake_up_interruptible(&data->read_wait); return 0; } static int vhci_get_data_path_id(struct hci_dev *hdev, u8 *data_path_id) { *data_path_id = 0; return 0; } static int vhci_get_codec_config_data(struct hci_dev *hdev, __u8 type, struct bt_codec *codec, __u8 *vnd_len, __u8 **vnd_data) { if (type != ESCO_LINK) return -EINVAL; *vnd_len = 0; *vnd_data = NULL; return 0; } static bool vhci_wakeup(struct hci_dev *hdev) { struct vhci_data *data = hci_get_drvdata(hdev); return data->wakeup; } static ssize_t force_suspend_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; char buf[3]; buf[0] = data->suspended ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } static void vhci_suspend_work(struct work_struct *work) { struct vhci_data *data = container_of(work, struct vhci_data, suspend_work); if (data->suspended) hci_suspend_dev(data->hdev); else hci_resume_dev(data->hdev); } static ssize_t force_suspend_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; bool enable; int err; err = kstrtobool_from_user(user_buf, count, &enable); if (err) return err; if (data->suspended == enable) return -EALREADY; data->suspended = enable; schedule_work(&data->suspend_work); return count; } static const struct file_operations force_suspend_fops = { .open = simple_open, .read = force_suspend_read, .write = force_suspend_write, .llseek = default_llseek, }; static ssize_t force_wakeup_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; char buf[3]; buf[0] = data->wakeup ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } static ssize_t force_wakeup_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; bool enable; int err; err = kstrtobool_from_user(user_buf, count, &enable); if (err) return err; if (data->wakeup == enable) return -EALREADY; data->wakeup = enable; return count; } static const struct file_operations force_wakeup_fops = { .open = simple_open, .read = force_wakeup_read, .write = force_wakeup_write, .llseek = default_llseek, }; static int msft_opcode_set(void *data, u64 val) { struct vhci_data *vhci = data; if (val > 0xffff || hci_opcode_ogf(val) != 0x3f) return -EINVAL; if (vhci->msft_opcode) return -EALREADY; vhci->msft_opcode = val; return 0; } static int msft_opcode_get(void *data, u64 *val) { struct vhci_data *vhci = data; *val = vhci->msft_opcode; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(msft_opcode_fops, msft_opcode_get, msft_opcode_set, "%llu\n"); static ssize_t aosp_capable_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *vhci = file->private_data; char buf[3]; buf[0] = vhci->aosp_capable ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } static ssize_t aosp_capable_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *vhci = file->private_data; bool enable; int err; err = kstrtobool_from_user(user_buf, count, &enable); if (err) return err; if (!enable) return -EINVAL; if (vhci->aosp_capable) return -EALREADY; vhci->aosp_capable = enable; return count; } static const struct file_operations aosp_capable_fops = { .open = simple_open, .read = aosp_capable_read, .write = aosp_capable_write, .llseek = default_llseek, }; static int vhci_setup(struct hci_dev *hdev) { struct vhci_data *vhci = hci_get_drvdata(hdev); if (vhci->msft_opcode) hci_set_msft_opcode(hdev, vhci->msft_opcode); if (vhci->aosp_capable) hci_set_aosp_capable(hdev); return 0; } static void vhci_coredump(struct hci_dev *hdev) { /* No need to do anything */ } static void vhci_coredump_hdr(struct hci_dev *hdev, struct sk_buff *skb) { char buf[80]; snprintf(buf, sizeof(buf), "Controller Name: vhci_ctrl\n"); skb_put_data(skb, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Firmware Version: vhci_fw\n"); skb_put_data(skb, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Driver: vhci_drv\n"); skb_put_data(skb, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Vendor: vhci\n"); skb_put_data(skb, buf, strlen(buf)); } #define MAX_COREDUMP_LINE_LEN 40 struct devcoredump_test_data { enum devcoredump_state state; unsigned int timeout; char data[MAX_COREDUMP_LINE_LEN]; }; static inline void force_devcd_timeout(struct hci_dev *hdev, unsigned int timeout) { #ifdef CONFIG_DEV_COREDUMP hdev->dump.timeout = msecs_to_jiffies(timeout * 1000); #endif } static ssize_t force_devcd_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct vhci_data *data = file->private_data; struct hci_dev *hdev = data->hdev; struct sk_buff *skb = NULL; struct devcoredump_test_data dump_data; size_t data_size; int ret; if (count < offsetof(struct devcoredump_test_data, data) || count > sizeof(dump_data)) return -EINVAL; if (copy_from_user(&dump_data, user_buf, count)) return -EFAULT; data_size = count - offsetof(struct devcoredump_test_data, data); skb = alloc_skb(data_size, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_put_data(skb, &dump_data.data, data_size); hci_devcd_register(hdev, vhci_coredump, vhci_coredump_hdr, NULL); /* Force the devcoredump timeout */ if (dump_data.timeout) force_devcd_timeout(hdev, dump_data.timeout); ret = hci_devcd_init(hdev, skb->len); if (ret) { BT_ERR("Failed to generate devcoredump"); kfree_skb(skb); return ret; } hci_devcd_append(hdev, skb); switch (dump_data.state) { case HCI_DEVCOREDUMP_DONE: hci_devcd_complete(hdev); break; case HCI_DEVCOREDUMP_ABORT: hci_devcd_abort(hdev); break; case HCI_DEVCOREDUMP_TIMEOUT: /* Do nothing */ break; default: return -EINVAL; } return count; } static const struct file_operations force_devcoredump_fops = { .open = simple_open, .write = force_devcd_write, }; static int __vhci_create_device(struct vhci_data *data, __u8 opcode) { struct hci_dev *hdev; struct sk_buff *skb; if (data->hdev) return -EBADFD; /* bits 2-5 are reserved (must be zero) */ if (opcode & 0x3c) return -EINVAL; skb = bt_skb_alloc(4, GFP_KERNEL); if (!skb) return -ENOMEM; hdev = hci_alloc_dev(); if (!hdev) { kfree_skb(skb); return -ENOMEM; } data->hdev = hdev; hdev->bus = HCI_VIRTUAL; hci_set_drvdata(hdev, data); hdev->open = vhci_open_dev; hdev->close = vhci_close_dev; hdev->flush = vhci_flush; hdev->send = vhci_send_frame; hdev->get_data_path_id = vhci_get_data_path_id; hdev->get_codec_config_data = vhci_get_codec_config_data; hdev->wakeup = vhci_wakeup; hdev->setup = vhci_setup; set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); /* bit 6 is for external configuration */ if (opcode & 0x40) set_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks); /* bit 7 is for raw device */ if (opcode & 0x80) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); data->hdev = NULL; kfree_skb(skb); return -EBUSY; } debugfs_create_file("force_suspend", 0644, hdev->debugfs, data, &force_suspend_fops); debugfs_create_file("force_wakeup", 0644, hdev->debugfs, data, &force_wakeup_fops); if (IS_ENABLED(CONFIG_BT_MSFTEXT)) debugfs_create_file("msft_opcode", 0644, hdev->debugfs, data, &msft_opcode_fops); if (IS_ENABLED(CONFIG_BT_AOSPEXT)) debugfs_create_file("aosp_capable", 0644, hdev->debugfs, data, &aosp_capable_fops); debugfs_create_file("force_devcoredump", 0644, hdev->debugfs, data, &force_devcoredump_fops); hci_skb_pkt_type(skb) = HCI_VENDOR_PKT; skb_put_u8(skb, 0xff); skb_put_u8(skb, opcode); put_unaligned_le16(hdev->id, skb_put(skb, 2)); skb_queue_head(&data->readq, skb); atomic_inc(&data->initialized); wake_up_interruptible(&data->read_wait); return 0; } static int vhci_create_device(struct vhci_data *data, __u8 opcode) { int err; mutex_lock(&data->open_mutex); err = __vhci_create_device(data, opcode); mutex_unlock(&data->open_mutex); return err; } static inline ssize_t vhci_get_user(struct vhci_data *data, struct iov_iter *from) { size_t len = iov_iter_count(from); struct sk_buff *skb; __u8 pkt_type, opcode; int ret; if (len < 2 || len > HCI_MAX_FRAME_SIZE) return -EINVAL; skb = bt_skb_alloc(len, GFP_KERNEL); if (!skb) return -ENOMEM; if (!copy_from_iter_full(skb_put(skb, len), len, from)) { kfree_skb(skb); return -EFAULT; } pkt_type = *((__u8 *) skb->data); skb_pull(skb, 1); switch (pkt_type) { case HCI_EVENT_PKT: case HCI_ACLDATA_PKT: case HCI_SCODATA_PKT: case HCI_ISODATA_PKT: if (!data->hdev) { kfree_skb(skb); return -ENODEV; } hci_skb_pkt_type(skb) = pkt_type; ret = hci_recv_frame(data->hdev, skb); break; case HCI_VENDOR_PKT: cancel_delayed_work_sync(&data->open_timeout); opcode = *((__u8 *) skb->data); skb_pull(skb, 1); if (skb->len > 0) { kfree_skb(skb); return -EINVAL; } kfree_skb(skb); ret = vhci_create_device(data, opcode); break; default: kfree_skb(skb); return -EINVAL; } return (ret < 0) ? ret : len; } static inline ssize_t vhci_put_user(struct vhci_data *data, struct sk_buff *skb, char __user *buf, int count) { char __user *ptr = buf; int len; len = min_t(unsigned int, skb->len, count); if (copy_to_user(ptr, skb->data, len)) return -EFAULT; if (!data->hdev) return len; data->hdev->stat.byte_tx += len; switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: data->hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: data->hdev->stat.acl_tx++; break; case HCI_SCODATA_PKT: data->hdev->stat.sco_tx++; break; } return len; } static ssize_t vhci_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct vhci_data *data = file->private_data; struct sk_buff *skb; ssize_t ret = 0; while (count) { skb = skb_dequeue(&data->readq); if (skb) { ret = vhci_put_user(data, skb, buf, count); if (ret < 0) skb_queue_head(&data->readq, skb); else kfree_skb(skb); break; } if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } ret = wait_event_interruptible(data->read_wait, !skb_queue_empty(&data->readq)); if (ret < 0) break; } return ret; } static ssize_t vhci_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhci_data *data = file->private_data; return vhci_get_user(data, from); } static __poll_t vhci_poll(struct file *file, poll_table *wait) { struct vhci_data *data = file->private_data; poll_wait(file, &data->read_wait, wait); if (!skb_queue_empty(&data->readq)) return EPOLLIN | EPOLLRDNORM; return EPOLLOUT | EPOLLWRNORM; } static void vhci_open_timeout(struct work_struct *work) { struct vhci_data *data = container_of(work, struct vhci_data, open_timeout.work); vhci_create_device(data, 0x00); } static int vhci_open(struct inode *inode, struct file *file) { struct vhci_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; skb_queue_head_init(&data->readq); init_waitqueue_head(&data->read_wait); mutex_init(&data->open_mutex); INIT_DELAYED_WORK(&data->open_timeout, vhci_open_timeout); INIT_WORK(&data->suspend_work, vhci_suspend_work); file->private_data = data; nonseekable_open(inode, file); schedule_delayed_work(&data->open_timeout, msecs_to_jiffies(1000)); return 0; } static int vhci_release(struct inode *inode, struct file *file) { struct vhci_data *data = file->private_data; struct hci_dev *hdev; cancel_delayed_work_sync(&data->open_timeout); flush_work(&data->suspend_work); hdev = data->hdev; if (hdev) { hci_unregister_dev(hdev); hci_free_dev(hdev); } skb_queue_purge(&data->readq); file->private_data = NULL; kfree(data); return 0; } static const struct file_operations vhci_fops = { .owner = THIS_MODULE, .read = vhci_read, .write_iter = vhci_write, .poll = vhci_poll, .open = vhci_open, .release = vhci_release, }; static struct miscdevice vhci_miscdev = { .name = "vhci", .fops = &vhci_fops, .minor = VHCI_MINOR, }; module_misc_device(vhci_miscdev); module_param(amp, bool, 0644); MODULE_PARM_DESC(amp, "Create AMP controller device"); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS("devname:vhci"); MODULE_ALIAS_MISCDEV(VHCI_MINOR); |
| 87 3 44 12 18 33 10 1 14 1 29 1 17 1 12 1 34 31 31 15 7 28 28 7 1 2 1 2 1 1 3 1 3 1 1 1 1 2 5 2 1 2 8 2 2 1 1 2 2 1 1 1 2 3 13 1 2 2 7 2 18 16 2 32 31 1 1 2 3 20 1 88 81 1 5 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_device.h" #include "seq_oss_synth.h" #include "seq_oss_midi.h" #include "seq_oss_event.h" #include "seq_oss_timer.h" #include <sound/seq_oss_legacy.h> #include "seq_oss_readq.h" #include "seq_oss_writeq.h" #include <linux/nospec.h> /* * prototypes */ static int extended_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev); static int chn_voice_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int chn_common_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int timing_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int local_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int old_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev); static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev); static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev); static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev); static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev); static int set_echo_event(struct seq_oss_devinfo *dp, union evrec *rec, struct snd_seq_event *ev); /* * convert an OSS event to ALSA event * return 0 : enqueued * non-zero : invalid - ignored */ int snd_seq_oss_process_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->s.code) { case SEQ_EXTENDED: return extended_event(dp, q, ev); case EV_CHN_VOICE: return chn_voice_event(dp, q, ev); case EV_CHN_COMMON: return chn_common_event(dp, q, ev); case EV_TIMING: return timing_event(dp, q, ev); case EV_SEQ_LOCAL: return local_event(dp, q, ev); case EV_SYSEX: return snd_seq_oss_synth_sysex(dp, q->x.dev, q->x.buf, ev); case SEQ_MIDIPUTC: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; /* put a midi byte */ if (! is_write_mode(dp->file_mode)) break; if (snd_seq_oss_midi_open(dp, q->s.dev, SNDRV_SEQ_OSS_FILE_WRITE)) break; if (snd_seq_oss_midi_filemode(dp, q->s.dev) & SNDRV_SEQ_OSS_FILE_WRITE) return snd_seq_oss_midi_putc(dp, q->s.dev, q->s.parm1, ev); break; case SEQ_ECHO: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return set_echo_event(dp, q, ev); case SEQ_PRIVATE: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return snd_seq_oss_synth_raw_event(dp, q->c[1], q->c, ev); default: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return old_event(dp, q, ev); } return -EINVAL; } /* old type events: mode1 only */ static int old_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->s.code) { case SEQ_NOTEOFF: return note_off_event(dp, 0, q->n.chn, q->n.note, q->n.vel, ev); case SEQ_NOTEON: return note_on_event(dp, 0, q->n.chn, q->n.note, q->n.vel, ev); case SEQ_WAIT: /* skip */ break; case SEQ_PGMCHANGE: return set_control_event(dp, 0, SNDRV_SEQ_EVENT_PGMCHANGE, q->n.chn, 0, q->n.note, ev); case SEQ_SYNCTIMER: return snd_seq_oss_timer_reset(dp->timer); } return -EINVAL; } /* 8bytes extended event: mode1 only */ static int extended_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { int val; switch (q->e.cmd) { case SEQ_NOTEOFF: return note_off_event(dp, q->e.dev, q->e.chn, q->e.p1, q->e.p2, ev); case SEQ_NOTEON: return note_on_event(dp, q->e.dev, q->e.chn, q->e.p1, q->e.p2, ev); case SEQ_PGMCHANGE: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_PGMCHANGE, q->e.chn, 0, q->e.p1, ev); case SEQ_AFTERTOUCH: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CHANPRESS, q->e.chn, 0, q->e.p1, ev); case SEQ_BALANCE: /* convert -128:127 to 0:127 */ val = (char)q->e.p1; val = (val + 128) / 2; return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CONTROLLER, q->e.chn, CTL_PAN, val, ev); case SEQ_CONTROLLER: val = ((short)q->e.p3 << 8) | (short)q->e.p2; switch (q->e.p1) { case CTRL_PITCH_BENDER: /* SEQ1 V2 control */ /* -0x2000:0x1fff */ return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_PITCHBEND, q->e.chn, 0, val, ev); case CTRL_PITCH_BENDER_RANGE: /* conversion: 100/semitone -> 128/semitone */ return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_REGPARAM, q->e.chn, 0, val*128/100, ev); default: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CONTROL14, q->e.chn, q->e.p1, val, ev); } case SEQ_VOLMODE: return snd_seq_oss_synth_raw_event(dp, q->e.dev, q->c, ev); } return -EINVAL; } /* channel voice events: mode1 and 2 */ static int chn_voice_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { if (q->v.chn >= 32) return -EINVAL; switch (q->v.cmd) { case MIDI_NOTEON: return note_on_event(dp, q->v.dev, q->v.chn, q->v.note, q->v.parm, ev); case MIDI_NOTEOFF: return note_off_event(dp, q->v.dev, q->v.chn, q->v.note, q->v.parm, ev); case MIDI_KEY_PRESSURE: return set_note_event(dp, q->v.dev, SNDRV_SEQ_EVENT_KEYPRESS, q->v.chn, q->v.note, q->v.parm, ev); } return -EINVAL; } /* channel common events: mode1 and 2 */ static int chn_common_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { if (q->l.chn >= 32) return -EINVAL; switch (q->l.cmd) { case MIDI_PGM_CHANGE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_PGMCHANGE, q->l.chn, 0, q->l.p1, ev); case MIDI_CTL_CHANGE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_CONTROLLER, q->l.chn, q->l.p1, q->l.val, ev); case MIDI_PITCH_BEND: /* conversion: 0:0x3fff -> -0x2000:0x1fff */ return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_PITCHBEND, q->l.chn, 0, q->l.val - 8192, ev); case MIDI_CHN_PRESSURE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_CHANPRESS, q->l.chn, 0, q->l.val, ev); } return -EINVAL; } /* timer events: mode1 and mode2 */ static int timing_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->t.cmd) { case TMR_ECHO: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return set_echo_event(dp, q, ev); else { union evrec tmp; memset(&tmp, 0, sizeof(tmp)); /* XXX: only for little-endian! */ tmp.echo = (q->t.time << 8) | SEQ_ECHO; return set_echo_event(dp, &tmp, ev); } case TMR_STOP: if (dp->seq_mode) return snd_seq_oss_timer_stop(dp->timer); return 0; case TMR_CONTINUE: if (dp->seq_mode) return snd_seq_oss_timer_continue(dp->timer); return 0; case TMR_TEMPO: if (dp->seq_mode) return snd_seq_oss_timer_tempo(dp->timer, q->t.time); return 0; } return -EINVAL; } /* local events: mode1 and 2 */ static int local_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { return -EINVAL; } /* * process note-on event for OSS synth * three different modes are available: * - SNDRV_SEQ_OSS_PROCESS_EVENTS (for one-voice per channel mode) * Accept note 255 as volume change. * - SNDRV_SEQ_OSS_PASS_EVENTS * Pass all events to lowlevel driver anyway * - SNDRV_SEQ_OSS_PROCESS_KEYPRESS (mostly for Emu8000) * Use key-pressure if note >= 128 */ static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { struct seq_oss_synthinfo *info; info = snd_seq_oss_synth_info(dp, dev); if (!info) return -ENXIO; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { /* pass directly */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } ch = array_index_nospec(ch, info->nr_voices); if (note == 255 && info->ch[ch].note >= 0) { /* volume control */ int type; //if (! vel) /* set volume to zero -- note off */ // type = SNDRV_SEQ_EVENT_NOTEOFF; //else if (info->ch[ch].vel) /* sample already started -- volume change */ type = SNDRV_SEQ_EVENT_KEYPRESS; else /* sample not started -- start now */ type = SNDRV_SEQ_EVENT_NOTEON; info->ch[ch].vel = vel; return set_note_event(dp, dev, type, ch, info->ch[ch].note, vel, ev); } else if (note >= 128) return -EINVAL; /* invalid */ if (note != info->ch[ch].note && info->ch[ch].note >= 0) /* note changed - note off at beginning */ set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, info->ch[ch].note, 0, ev); /* set current status */ info->ch[ch].note = note; info->ch[ch].vel = vel; if (vel) /* non-zero velocity - start the note now */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); return -EINVAL; case SNDRV_SEQ_OSS_PASS_EVENTS: /* pass the event anyway */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); case SNDRV_SEQ_OSS_PROCESS_KEYPRESS: if (note >= 128) /* key pressure: shifted by 128 */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_KEYPRESS, ch, note - 128, vel, ev); else /* normal note-on event */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } return -EINVAL; } /* * process note-off event for OSS synth */ static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { struct seq_oss_synthinfo *info; info = snd_seq_oss_synth_info(dp, dev); if (!info) return -ENXIO; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { /* pass directly */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } ch = array_index_nospec(ch, info->nr_voices); if (info->ch[ch].note >= 0) { note = info->ch[ch].note; info->ch[ch].vel = 0; info->ch[ch].note = -1; return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, note, vel, ev); } return -EINVAL; /* invalid */ case SNDRV_SEQ_OSS_PASS_EVENTS: case SNDRV_SEQ_OSS_PROCESS_KEYPRESS: /* pass the event anyway */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, note, vel, ev); } return -EINVAL; } /* * create a note event */ static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev) { if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; snd_seq_oss_synth_addr(dp, dev, ev); ev->data.note.channel = ch; ev->data.note.note = note; ev->data.note.velocity = vel; return 0; } /* * create a control event */ static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev) { if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; snd_seq_oss_synth_addr(dp, dev, ev); ev->data.control.channel = ch; ev->data.control.param = param; ev->data.control.value = val; return 0; } /* * create an echo event */ static int set_echo_event(struct seq_oss_devinfo *dp, union evrec *rec, struct snd_seq_event *ev) { ev->type = SNDRV_SEQ_EVENT_ECHO; /* echo back to itself */ snd_seq_oss_fill_addr(dp, ev, dp->addr.client, dp->addr.port); memcpy(&ev->data, rec, LONG_EVENT_SIZE); return 0; } /* * event input callback from ALSA sequencer: * the echo event is processed here. */ int snd_seq_oss_event_input(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct seq_oss_devinfo *dp = (struct seq_oss_devinfo *)private_data; union evrec *rec; if (ev->type != SNDRV_SEQ_EVENT_ECHO) return snd_seq_oss_midi_input(ev, direct, private_data); if (ev->source.client != dp->cseq) return 0; /* ignored */ rec = (union evrec*)&ev->data; if (rec->s.code == SEQ_SYNCTIMER) { /* sync echo back */ snd_seq_oss_writeq_wakeup(dp->writeq, rec->t.time); } else { /* echo back event */ if (dp->readq == NULL) return 0; snd_seq_oss_readq_put_event(dp->readq, rec); } return 0; } |
| 50 27 13 5 5 15 15 11 11 11 11 11 15 50 50 22 22 11 22 22 15 15 11 11 11 11 11 10 11 11 11 14 15 15 17 4 15 290 290 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * Copyright (c) 2008 Dave Chinner * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_trace.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_log.h" #include "xfs_log_priv.h" #ifdef DEBUG /* * Check that the list is sorted as it should be. * * Called with the ail lock held, but we don't want to assert fail with it * held otherwise we'll lock everything up and won't be able to debug the * cause. Hence we sample and check the state under the AIL lock and return if * everything is fine, otherwise we drop the lock and run the ASSERT checks. * Asserts may not be fatal, so pick the lock back up and continue onwards. */ STATIC void xfs_ail_check( struct xfs_ail *ailp, struct xfs_log_item *lip) __must_hold(&ailp->ail_lock) { struct xfs_log_item *prev_lip; struct xfs_log_item *next_lip; xfs_lsn_t prev_lsn = NULLCOMMITLSN; xfs_lsn_t next_lsn = NULLCOMMITLSN; xfs_lsn_t lsn; bool in_ail; if (list_empty(&ailp->ail_head)) return; /* * Sample then check the next and previous entries are valid. */ in_ail = test_bit(XFS_LI_IN_AIL, &lip->li_flags); prev_lip = list_entry(lip->li_ail.prev, struct xfs_log_item, li_ail); if (&prev_lip->li_ail != &ailp->ail_head) prev_lsn = prev_lip->li_lsn; next_lip = list_entry(lip->li_ail.next, struct xfs_log_item, li_ail); if (&next_lip->li_ail != &ailp->ail_head) next_lsn = next_lip->li_lsn; lsn = lip->li_lsn; if (in_ail && (prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0) && (next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0)) return; spin_unlock(&ailp->ail_lock); ASSERT(in_ail); ASSERT(prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0); ASSERT(next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0); spin_lock(&ailp->ail_lock); } #else /* !DEBUG */ #define xfs_ail_check(a,l) #endif /* DEBUG */ /* * Return a pointer to the last item in the AIL. If the AIL is empty, then * return NULL. */ static struct xfs_log_item * xfs_ail_max( struct xfs_ail *ailp) { if (list_empty(&ailp->ail_head)) return NULL; return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail); } /* * Return a pointer to the item which follows the given item in the AIL. If * the given item is the last item in the list, then return NULL. */ static struct xfs_log_item * xfs_ail_next( struct xfs_ail *ailp, struct xfs_log_item *lip) { if (lip->li_ail.next == &ailp->ail_head) return NULL; return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail); } /* * This is called by the log manager code to determine the LSN of the tail of * the log. This is exactly the LSN of the first item in the AIL. If the AIL * is empty, then this function returns 0. * * We need the AIL lock in order to get a coherent read of the lsn of the last * item in the AIL. */ static xfs_lsn_t __xfs_ail_min_lsn( struct xfs_ail *ailp) { struct xfs_log_item *lip = xfs_ail_min(ailp); if (lip) return lip->li_lsn; return 0; } xfs_lsn_t xfs_ail_min_lsn( struct xfs_ail *ailp) { xfs_lsn_t lsn; spin_lock(&ailp->ail_lock); lsn = __xfs_ail_min_lsn(ailp); spin_unlock(&ailp->ail_lock); return lsn; } /* * The cursor keeps track of where our current traversal is up to by tracking * the next item in the list for us. However, for this to be safe, removing an * object from the AIL needs to invalidate any cursor that points to it. hence * the traversal cursor needs to be linked to the struct xfs_ail so that * deletion can search all the active cursors for invalidation. */ STATIC void xfs_trans_ail_cursor_init( struct xfs_ail *ailp, struct xfs_ail_cursor *cur) { cur->item = NULL; list_add_tail(&cur->list, &ailp->ail_cursors); } /* * Get the next item in the traversal and advance the cursor. If the cursor * was invalidated (indicated by a lip of 1), restart the traversal. */ struct xfs_log_item * xfs_trans_ail_cursor_next( struct xfs_ail *ailp, struct xfs_ail_cursor *cur) { struct xfs_log_item *lip = cur->item; if ((uintptr_t)lip & 1) lip = xfs_ail_min(ailp); if (lip) cur->item = xfs_ail_next(ailp, lip); return lip; } /* * When the traversal is complete, we need to remove the cursor from the list * of traversing cursors. */ void xfs_trans_ail_cursor_done( struct xfs_ail_cursor *cur) { cur->item = NULL; list_del_init(&cur->list); } /* * Invalidate any cursor that is pointing to this item. This is called when an * item is removed from the AIL. Any cursor pointing to this object is now * invalid and the traversal needs to be terminated so it doesn't reference a * freed object. We set the low bit of the cursor item pointer so we can * distinguish between an invalidation and the end of the list when getting the * next item from the cursor. */ STATIC void xfs_trans_ail_cursor_clear( struct xfs_ail *ailp, struct xfs_log_item *lip) { struct xfs_ail_cursor *cur; list_for_each_entry(cur, &ailp->ail_cursors, list) { if (cur->item == lip) cur->item = (struct xfs_log_item *) ((uintptr_t)cur->item | 1); } } /* * Find the first item in the AIL with the given @lsn by searching in ascending * LSN order and initialise the cursor to point to the next item for a * ascending traversal. Pass a @lsn of zero to initialise the cursor to the * first item in the AIL. Returns NULL if the list is empty. */ struct xfs_log_item * xfs_trans_ail_cursor_first( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn) { struct xfs_log_item *lip; xfs_trans_ail_cursor_init(ailp, cur); if (lsn == 0) { lip = xfs_ail_min(ailp); goto out; } list_for_each_entry(lip, &ailp->ail_head, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) goto out; } return NULL; out: if (lip) cur->item = xfs_ail_next(ailp, lip); return lip; } static struct xfs_log_item * __xfs_trans_ail_cursor_last( struct xfs_ail *ailp, xfs_lsn_t lsn) { struct xfs_log_item *lip; list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) return lip; } return NULL; } /* * Find the last item in the AIL with the given @lsn by searching in descending * LSN order and initialise the cursor to point to that item. If there is no * item with the value of @lsn, then it sets the cursor to the last item with an * LSN lower than @lsn. Returns NULL if the list is empty. */ struct xfs_log_item * xfs_trans_ail_cursor_last( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn) { xfs_trans_ail_cursor_init(ailp, cur); cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); return cur->item; } /* * Splice the log item list into the AIL at the given LSN. We splice to the * tail of the given LSN to maintain insert order for push traversals. The * cursor is optional, allowing repeated updates to the same LSN to avoid * repeated traversals. This should not be called with an empty list. */ static void xfs_ail_splice( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, struct list_head *list, xfs_lsn_t lsn) { struct xfs_log_item *lip; ASSERT(!list_empty(list)); /* * Use the cursor to determine the insertion point if one is * provided. If not, or if the one we got is not valid, * find the place in the AIL where the items belong. */ lip = cur ? cur->item : NULL; if (!lip || (uintptr_t)lip & 1) lip = __xfs_trans_ail_cursor_last(ailp, lsn); /* * If a cursor is provided, we know we're processing the AIL * in lsn order, and future items to be spliced in will * follow the last one being inserted now. Update the * cursor to point to that last item, now while we have a * reliable pointer to it. */ if (cur) cur->item = list_entry(list->prev, struct xfs_log_item, li_ail); /* * Finally perform the splice. Unless the AIL was empty, * lip points to the item in the AIL _after_ which the new * items should go. If lip is null the AIL was empty, so * the new items go at the head of the AIL. */ if (lip) list_splice(list, &lip->li_ail); else list_splice(list, &ailp->ail_head); } /* * Delete the given item from the AIL. Return a pointer to the item. */ static void xfs_ail_delete( struct xfs_ail *ailp, struct xfs_log_item *lip) { xfs_ail_check(ailp, lip); list_del(&lip->li_ail); xfs_trans_ail_cursor_clear(ailp, lip); } /* * Requeue a failed buffer for writeback. * * We clear the log item failed state here as well, but we have to be careful * about reference counts because the only active reference counts on the buffer * may be the failed log items. Hence if we clear the log item failed state * before queuing the buffer for IO we can release all active references to * the buffer and free it, leading to use after free problems in * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which * order we process them in - the buffer is locked, and we own the buffer list * so nothing on them is going to change while we are performing this action. * * Hence we can safely queue the buffer for IO before we clear the failed log * item state, therefore always having an active reference to the buffer and * avoiding the transient zero-reference state that leads to use-after-free. */ static inline int xfsaild_resubmit_item( struct xfs_log_item *lip, struct list_head *buffer_list) { struct xfs_buf *bp = lip->li_buf; if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; if (!xfs_buf_delwri_queue(bp, buffer_list)) { xfs_buf_unlock(bp); return XFS_ITEM_FLUSHING; } /* protected by ail_lock */ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) clear_bit(XFS_LI_FAILED, &lip->li_flags); xfs_buf_unlock(bp); return XFS_ITEM_SUCCESS; } static inline uint xfsaild_push_item( struct xfs_ail *ailp, struct xfs_log_item *lip) { /* * If log item pinning is enabled, skip the push and track the item as * pinned. This can help induce head-behind-tail conditions. */ if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN)) return XFS_ITEM_PINNED; /* * Consider the item pinned if a push callback is not defined so the * caller will force the log. This should only happen for intent items * as they are unpinned once the associated done item is committed to * the on-disk log. */ if (!lip->li_ops->iop_push) return XFS_ITEM_PINNED; if (test_bit(XFS_LI_FAILED, &lip->li_flags)) return xfsaild_resubmit_item(lip, &ailp->ail_buf_list); return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); } /* * Compute the LSN that we'd need to push the log tail towards in order to have * at least 25% of the log space free. If the log free space already meets this * threshold, this function returns the lowest LSN in the AIL to slowly keep * writeback ticking over and the tail of the log moving forward. */ static xfs_lsn_t xfs_ail_calc_push_target( struct xfs_ail *ailp) { struct xlog *log = ailp->ail_log; struct xfs_log_item *lip; xfs_lsn_t target_lsn; xfs_lsn_t max_lsn; xfs_lsn_t min_lsn; int32_t free_bytes; uint32_t target_block; uint32_t target_cycle; lockdep_assert_held(&ailp->ail_lock); lip = xfs_ail_max(ailp); if (!lip) return NULLCOMMITLSN; max_lsn = lip->li_lsn; min_lsn = __xfs_ail_min_lsn(ailp); /* * If we are supposed to push all the items in the AIL, we want to push * to the current head. We then clear the push flag so that we don't * keep pushing newly queued items beyond where the push all command was * run. If the push waiter wants to empty the ail, it should queue * itself on the ail_empty wait queue. */ if (test_and_clear_bit(XFS_AIL_OPSTATE_PUSH_ALL, &ailp->ail_opstate)) return max_lsn; /* If someone wants the AIL empty, keep pushing everything we have. */ if (waitqueue_active(&ailp->ail_empty)) return max_lsn; /* * Background pushing - attempt to keep 25% of the log free and if we * have that much free retain the existing target. */ free_bytes = log->l_logsize - xlog_lsn_sub(log, max_lsn, min_lsn); if (free_bytes >= log->l_logsize >> 2) return ailp->ail_target; target_cycle = CYCLE_LSN(min_lsn); target_block = BLOCK_LSN(min_lsn) + (log->l_logBBsize >> 2); if (target_block >= log->l_logBBsize) { target_block -= log->l_logBBsize; target_cycle += 1; } target_lsn = xlog_assign_lsn(target_cycle, target_block); /* Cap the target to the highest LSN known to be in the AIL. */ if (XFS_LSN_CMP(target_lsn, max_lsn) > 0) return max_lsn; /* If the existing target is higher than the new target, keep it. */ if (XFS_LSN_CMP(ailp->ail_target, target_lsn) >= 0) return ailp->ail_target; return target_lsn; } static long xfsaild_push( struct xfs_ail *ailp) { struct xfs_mount *mp = ailp->ail_log->l_mp; struct xfs_ail_cursor cur; struct xfs_log_item *lip; xfs_lsn_t lsn; long tout; int stuck = 0; int flushing = 0; int count = 0; /* * If we encountered pinned items or did not finish writing out all * buffers the last time we ran, force a background CIL push to get the * items unpinned in the near future. We do not wait on the CIL push as * that could stall us for seconds if there is enough background IO * load. Stalling for that long when the tail of the log is pinned and * needs flushing will hard stop the transaction subsystem when log * space runs out. */ if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 && (!list_empty_careful(&ailp->ail_buf_list) || xfs_ail_min_lsn(ailp))) { ailp->ail_log_flush = 0; XFS_STATS_INC(mp, xs_push_ail_flush); xlog_cil_flush(ailp->ail_log); } spin_lock(&ailp->ail_lock); WRITE_ONCE(ailp->ail_target, xfs_ail_calc_push_target(ailp)); if (ailp->ail_target == NULLCOMMITLSN) goto out_done; /* we're done if the AIL is empty or our push has reached the end */ lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn); if (!lip) goto out_done_cursor; XFS_STATS_INC(mp, xs_push_ail); ASSERT(ailp->ail_target != NULLCOMMITLSN); lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { int lock_result; if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) goto next_item; /* * Note that iop_push may unlock and reacquire the AIL lock. We * rely on the AIL cursor implementation to be able to deal with * the dropped lock. */ lock_result = xfsaild_push_item(ailp, lip); switch (lock_result) { case XFS_ITEM_SUCCESS: XFS_STATS_INC(mp, xs_push_ail_success); trace_xfs_ail_push(lip); ailp->ail_last_pushed_lsn = lsn; break; case XFS_ITEM_FLUSHING: /* * The item or its backing buffer is already being * flushed. The typical reason for that is that an * inode buffer is locked because we already pushed the * updates to it as part of inode clustering. * * We do not want to stop flushing just because lots * of items are already being flushed, but we need to * re-try the flushing relatively soon if most of the * AIL is being flushed. */ XFS_STATS_INC(mp, xs_push_ail_flushing); trace_xfs_ail_flushing(lip); flushing++; ailp->ail_last_pushed_lsn = lsn; break; case XFS_ITEM_PINNED: XFS_STATS_INC(mp, xs_push_ail_pinned); trace_xfs_ail_pinned(lip); stuck++; ailp->ail_log_flush++; break; case XFS_ITEM_LOCKED: XFS_STATS_INC(mp, xs_push_ail_locked); trace_xfs_ail_locked(lip); stuck++; break; default: ASSERT(0); break; } count++; /* * Are there too many items we can't do anything with? * * If we are skipping too many items because we can't flush * them or they are already being flushed, we back off and * given them time to complete whatever operation is being * done. i.e. remove pressure from the AIL while we can't make * progress so traversals don't slow down further inserts and * removals to/from the AIL. * * The value of 100 is an arbitrary magic number based on * observation. */ if (stuck > 100) break; next_item: lip = xfs_trans_ail_cursor_next(ailp, &cur); if (lip == NULL) break; if (lip->li_lsn != lsn && count > 1000) break; lsn = lip->li_lsn; } out_done_cursor: xfs_trans_ail_cursor_done(&cur); out_done: spin_unlock(&ailp->ail_lock); if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list)) ailp->ail_log_flush++; if (!count || XFS_LSN_CMP(lsn, ailp->ail_target) >= 0) { /* * We reached the target or the AIL is empty, so wait a bit * longer for I/O to complete and remove pushed items from the * AIL before we start the next scan from the start of the AIL. */ tout = 50; ailp->ail_last_pushed_lsn = 0; } else if (((stuck + flushing) * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we are * stuck due to operations in progress. "Stuck" in this case * is defined as >90% of the items we tried to push were stuck. * * Backoff a bit more to allow some I/O to complete before * restarting from the start of the AIL. This prevents us from * spinning on the same items, and if they are pinned will all * the restart to issue a log force to unpin the stuck items. */ tout = 20; ailp->ail_last_pushed_lsn = 0; } else { /* * Assume we have more work to do in a short while. */ tout = 0; } return tout; } static int xfsaild( void *data) { struct xfs_ail *ailp = data; long tout = 0; /* milliseconds */ unsigned int noreclaim_flag; noreclaim_flag = memalloc_noreclaim_save(); set_freezable(); while (1) { /* * Long waits of 50ms or more occur when we've run out of items * to push, so we only want uninterruptible state if we're * actually blocked on something. */ if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); /* * Check kthread_should_stop() after we set the task state to * guarantee that we either see the stop bit and exit or the * task state is reset to runnable such that it's not scheduled * out indefinitely and detects the stop bit at next iteration. * A memory barrier is included in above task state set to * serialize again kthread_stop(). */ if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); /* * The caller forces out the AIL before stopping the * thread in the common case, which means the delwri * queue is drained. In the shutdown case, the queue may * still hold relogged buffers that haven't been * submitted because they were pinned since added to the * queue. * * Log I/O error processing stales the underlying buffer * and clears the delwri state, expecting the buf to be * removed on the next submission attempt. That won't * happen if we're shutting down, so this is the last * opportunity to release such buffers from the queue. */ ASSERT(list_empty(&ailp->ail_buf_list) || xlog_is_shutdown(ailp->ail_log)); xfs_buf_delwri_cancel(&ailp->ail_buf_list); break; } /* Idle if the AIL is empty. */ spin_lock(&ailp->ail_lock); if (!xfs_ail_min(ailp) && list_empty(&ailp->ail_buf_list)) { spin_unlock(&ailp->ail_lock); schedule(); tout = 0; continue; } spin_unlock(&ailp->ail_lock); if (tout) schedule_timeout(msecs_to_jiffies(tout)); __set_current_state(TASK_RUNNING); try_to_freeze(); tout = xfsaild_push(ailp); } memalloc_noreclaim_restore(noreclaim_flag); return 0; } /* * Push out all items in the AIL immediately and wait until the AIL is empty. */ void xfs_ail_push_all_sync( struct xfs_ail *ailp) { DEFINE_WAIT(wait); spin_lock(&ailp->ail_lock); while (xfs_ail_max(ailp) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); schedule(); spin_lock(&ailp->ail_lock); } spin_unlock(&ailp->ail_lock); finish_wait(&ailp->ail_empty, &wait); } void __xfs_ail_assign_tail_lsn( struct xfs_ail *ailp) { struct xlog *log = ailp->ail_log; xfs_lsn_t tail_lsn; assert_spin_locked(&ailp->ail_lock); if (xlog_is_shutdown(log)) return; tail_lsn = __xfs_ail_min_lsn(ailp); if (!tail_lsn) tail_lsn = ailp->ail_head_lsn; WRITE_ONCE(log->l_tail_space, xlog_lsn_sub(log, ailp->ail_head_lsn, tail_lsn)); trace_xfs_log_assign_tail_lsn(log, tail_lsn); atomic64_set(&log->l_tail_lsn, tail_lsn); } /* * Callers should pass the original tail lsn so that we can detect if the tail * has moved as a result of the operation that was performed. If the caller * needs to force a tail space update, it should pass NULLCOMMITLSN to bypass * the "did the tail LSN change?" checks. If the caller wants to avoid a tail * update (e.g. it knows the tail did not change) it should pass an @old_lsn of * 0. */ void xfs_ail_update_finish( struct xfs_ail *ailp, xfs_lsn_t old_lsn) __releases(ailp->ail_lock) { struct xlog *log = ailp->ail_log; /* If the tail lsn hasn't changed, don't do updates or wakeups. */ if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) { spin_unlock(&ailp->ail_lock); return; } __xfs_ail_assign_tail_lsn(ailp); if (list_empty(&ailp->ail_head)) wake_up_all(&ailp->ail_empty); spin_unlock(&ailp->ail_lock); xfs_log_space_wake(log->l_mp); } /* * xfs_trans_ail_update - bulk AIL insertion operation. * * @xfs_trans_ail_update takes an array of log items that all need to be * positioned at the same LSN in the AIL. If an item is not in the AIL, it will * be added. Otherwise, it will be repositioned by removing it and re-adding * it to the AIL. If we move the first item in the AIL, update the log tail to * match the new minimum LSN in the AIL. * * This function takes the AIL lock once to execute the update operations on * all the items in the array, and as such should not be called with the AIL * lock held. As a result, once we have the AIL lock, we need to check each log * item LSN to confirm it needs to be moved forward in the AIL. * * To optimise the insert operation, we delete all the items from the AIL in * the first pass, moving them into a temporary list, then splice the temporary * list into the correct position in the AIL. This avoids needing to do an * insert operation on every item. * * This function must be called with the AIL lock held. The lock is dropped * before returning. */ void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->ail_lock) { struct xfs_log_item *mlip; xfs_lsn_t tail_lsn = 0; int i; LIST_HEAD(tmp); ASSERT(nr_items > 0); /* Not required, but true. */ mlip = xfs_ail_min(ailp); for (i = 0; i < nr_items; i++) { struct xfs_log_item *lip = log_items[i]; if (test_and_set_bit(XFS_LI_IN_AIL, &lip->li_flags)) { /* check if we really need to move the item */ if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) continue; trace_xfs_ail_move(lip, lip->li_lsn, lsn); if (mlip == lip && !tail_lsn) tail_lsn = lip->li_lsn; xfs_ail_delete(ailp, lip); } else { trace_xfs_ail_insert(lip, 0, lsn); } lip->li_lsn = lsn; list_add_tail(&lip->li_ail, &tmp); } if (!list_empty(&tmp)) xfs_ail_splice(ailp, cur, &tmp, lsn); /* * If this is the first insert, wake up the push daemon so it can * actively scan for items to push. We also need to do a log tail * LSN update to ensure that it is correctly tracked by the log, so * set the tail_lsn to NULLCOMMITLSN so that xfs_ail_update_finish() * will see that the tail lsn has changed and will update the tail * appropriately. */ if (!mlip) { wake_up_process(ailp->ail_task); tail_lsn = NULLCOMMITLSN; } xfs_ail_update_finish(ailp, tail_lsn); } /* Insert a log item into the AIL. */ void xfs_trans_ail_insert( struct xfs_ail *ailp, struct xfs_log_item *lip, xfs_lsn_t lsn) { spin_lock(&ailp->ail_lock); xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } /* * Delete one log item from the AIL. * * If this item was at the tail of the AIL, return the LSN of the log item so * that we can use it to check if the LSN of the tail of the log has moved * when finishing up the AIL delete process in xfs_ail_update_finish(). */ xfs_lsn_t xfs_ail_delete_one( struct xfs_ail *ailp, struct xfs_log_item *lip) { struct xfs_log_item *mlip = xfs_ail_min(ailp); xfs_lsn_t lsn = lip->li_lsn; trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); xfs_ail_delete(ailp, lip); clear_bit(XFS_LI_IN_AIL, &lip->li_flags); lip->li_lsn = 0; if (mlip == lip) return lsn; return 0; } void xfs_trans_ail_delete( struct xfs_log_item *lip, int shutdown_type) { struct xfs_ail *ailp = lip->li_ailp; struct xlog *log = ailp->ail_log; xfs_lsn_t tail_lsn; spin_lock(&ailp->ail_lock); if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { spin_unlock(&ailp->ail_lock); if (shutdown_type && !xlog_is_shutdown(log)) { xfs_alert_tag(log->l_mp, XFS_PTAG_AILDELETE, "%s: attempting to delete a log item that is not in the AIL", __func__); xlog_force_shutdown(log, shutdown_type); } return; } /* xfs_ail_update_finish() drops the AIL lock */ xfs_clear_li_failed(lip); tail_lsn = xfs_ail_delete_one(ailp, lip); xfs_ail_update_finish(ailp, tail_lsn); } int xfs_trans_ail_init( xfs_mount_t *mp) { struct xfs_ail *ailp; ailp = kzalloc(sizeof(struct xfs_ail), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!ailp) return -ENOMEM; ailp->ail_log = mp->m_log; INIT_LIST_HEAD(&ailp->ail_head); INIT_LIST_HEAD(&ailp->ail_cursors); spin_lock_init(&ailp->ail_lock); INIT_LIST_HEAD(&ailp->ail_buf_list); init_waitqueue_head(&ailp->ail_empty); ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s", mp->m_super->s_id); if (IS_ERR(ailp->ail_task)) goto out_free_ailp; mp->m_ail = ailp; return 0; out_free_ailp: kfree(ailp); return -ENOMEM; } void xfs_trans_ail_destroy( xfs_mount_t *mp) { struct xfs_ail *ailp = mp->m_ail; kthread_stop(ailp->ail_task); kfree(ailp); } |
| 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 | // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/bvec.h> #include <linux/crc32c.h> #include <linux/net.h> #include <linux/socket.h> #include <net/sock.h> #include <linux/ceph/ceph_features.h> #include <linux/ceph/decode.h> #include <linux/ceph/libceph.h> #include <linux/ceph/messenger.h> /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; /* * If @buf is NULL, discard up to @len bytes. */ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) { struct kvec iov = {buf, len}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; if (!buf) msg.msg_flags |= MSG_TRUNC; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; } static int ceph_tcp_recvpage(struct socket *sock, struct page *page, int page_offset, size_t length) { struct bio_vec bvec; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; BUG_ON(page_offset + length > PAGE_SIZE); bvec_set_page(&bvec, page, length, page_offset); iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; return r; } /* * write something. @more is true if caller will be sending more data * shortly. */ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, size_t kvlen, size_t len, bool more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; if (more) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ r = kernel_sendmsg(sock, &msg, iov, kvlen, len); if (r == -EAGAIN) r = 0; return r; } /* * @more: MSG_MORE or 0. */ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more, }; struct bio_vec bvec; int ret; /* * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0, * we need to fall back to sendmsg if that's the case. * * Same goes for slab pages: skb_can_coalesce() allows * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ if (sendpage_ok(page)) msg.msg_flags |= MSG_SPLICE_PAGES; bvec_set_page(&bvec, page, size, offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); ret = sock_sendmsg(sock, &msg); if (ret == -EAGAIN) ret = 0; return ret; } static void con_out_kvec_reset(struct ceph_connection *con) { BUG_ON(con->v1.out_skip); con->v1.out_kvec_left = 0; con->v1.out_kvec_bytes = 0; con->v1.out_kvec_cur = &con->v1.out_kvec[0]; } static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { int index = con->v1.out_kvec_left; BUG_ON(con->v1.out_skip); BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec)); con->v1.out_kvec[index].iov_len = size; con->v1.out_kvec[index].iov_base = data; con->v1.out_kvec_left++; con->v1.out_kvec_bytes += size; } /* * Chop off a kvec from the end. Return residual number of bytes for * that kvec, i.e. how many bytes would have been written if the kvec * hadn't been nuked. */ static int con_out_kvec_skip(struct ceph_connection *con) { int skip = 0; if (con->v1.out_kvec_bytes > 0) { skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len; BUG_ON(con->v1.out_kvec_bytes < skip); BUG_ON(!con->v1.out_kvec_left); con->v1.out_kvec_bytes -= skip; con->v1.out_kvec_left--; } return skip; } static size_t sizeof_footer(struct ceph_connection *con) { return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? sizeof(struct ceph_msg_footer) : sizeof(struct ceph_msg_footer_old); } static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { /* Initialize data cursor if it's not a sparse read */ u64 len = msg->sparse_read_total ? : data_len; ceph_msg_data_cursor_init(&msg->cursor, msg, len); } /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ static void prepare_write_message_footer(struct ceph_connection *con) { struct ceph_msg *m = con->out_msg; m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); con_out_kvec_add(con, sizeof_footer(con), &m->footer); if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) con->ops->sign_message(m); else m->footer.sig = 0; } else { m->old_footer.flags = m->footer.flags; } con->v1.out_more = m->more_to_follow; con->v1.out_msg_done = true; } /* * Prepare headers for the next outgoing message. */ static void prepare_write_message(struct ceph_connection *con) { struct ceph_msg *m; u32 crc; con_out_kvec_reset(con); con->v1.out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); } ceph_con_get_out_msg(con); m = con->out_msg; dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), m->data_length); WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len)); WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { crc = crc32c(0, m->middle->vec.iov_base, m->middle->vec.iov_len); con->out_msg->footer.middle_crc = cpu_to_le32(crc); } else con->out_msg->footer.middle_crc = 0; dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; if (m->data_length) { prepare_message_data(con->out_msg, m->data_length); con->v1.out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ prepare_write_message_footer(con); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare an ack. */ static void prepare_write_ack(struct ceph_connection *con) { dout("prepare_write_ack %p %llu -> %llu\n", con, con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); con->v1.out_more = 1; /* more will follow.. eventually.. */ ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare to share the seq during handshake */ static void prepare_write_seq(struct ceph_connection *con) { dout("prepare_write_seq %p %llu -> %llu\n", con, con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; con_out_kvec_reset(con); con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), &con->v1.out_temp_ack); ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Prepare to write keepalive byte. */ static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { struct timespec64 now; ktime_get_real_ts64(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2), &con->v1.out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } /* * Connection negotiation. */ static int get_connect_authorizer(struct ceph_connection *con) { struct ceph_auth_handshake *auth; int auth_proto; if (!con->ops->get_authorizer) { con->v1.auth = NULL; con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->v1.out_connect.authorizer_len = 0; return 0; } auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry); if (IS_ERR(auth)) return PTR_ERR(auth); con->v1.auth = auth; con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto); con->v1.out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len); return 0; } /* * We connected to a peer and are saying hello. */ static void prepare_write_banner(struct ceph_connection *con) { con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), &con->msgr->my_enc_addr); con->v1.out_more = 0; ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } static void __prepare_write_connect(struct ceph_connection *con) { con_out_kvec_add(con, sizeof(con->v1.out_connect), &con->v1.out_connect); if (con->v1.auth) con_out_kvec_add(con, con->v1.auth->authorizer_buf_len, con->v1.auth->authorizer_buf); con->v1.out_more = 0; ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); } static int prepare_write_connect(struct ceph_connection *con) { unsigned int global_seq = ceph_get_global_seq(con->msgr, 0); int proto; int ret; switch (con->peer_name.type) { case CEPH_ENTITY_TYPE_MON: proto = CEPH_MONC_PROTOCOL; break; case CEPH_ENTITY_TYPE_OSD: proto = CEPH_OSDC_PROTOCOL; break; case CEPH_ENTITY_TYPE_MDS: proto = CEPH_MDSC_PROTOCOL; break; default: BUG(); } dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->v1.connect_seq, global_seq, proto); con->v1.out_connect.features = cpu_to_le64(from_msgr(con->msgr)->supported_features); con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq); con->v1.out_connect.global_seq = cpu_to_le32(global_seq); con->v1.out_connect.protocol_version = cpu_to_le32(proto); con->v1.out_connect.flags = 0; ret = get_connect_authorizer(con); if (ret) return ret; __prepare_write_connect(con); return 0; } /* * write as much of pending kvecs to the socket as we can. * 1 -> done * 0 -> socket full, but more to do * <0 -> error */ static int write_partial_kvec(struct ceph_connection *con) { int ret; dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes); while (con->v1.out_kvec_bytes > 0) { ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur, con->v1.out_kvec_left, con->v1.out_kvec_bytes, con->v1.out_more); if (ret <= 0) goto out; con->v1.out_kvec_bytes -= ret; if (!con->v1.out_kvec_bytes) break; /* done */ /* account for full iov entries consumed */ while (ret >= con->v1.out_kvec_cur->iov_len) { BUG_ON(!con->v1.out_kvec_left); ret -= con->v1.out_kvec_cur->iov_len; con->v1.out_kvec_cur++; con->v1.out_kvec_left--; } /* and for a partially-consumed entry */ if (ret) { con->v1.out_kvec_cur->iov_len -= ret; con->v1.out_kvec_cur->iov_base += ret; } } con->v1.out_kvec_left = 0; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret); return ret; /* done! */ } /* * Write as much message data payload as we can. If we finish, queue * up the footer. * 1 -> done, footer is now queued in out_kvec[]. * 0 -> socket full, but more to do * <0 -> error */ static int write_partial_message_data(struct ceph_connection *con) { struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc; dout("%s %p msg %p\n", __func__, con, msg); if (!msg->num_data_items) return -EINVAL; /* * Iterate through each page that contains data to be * written, and send as much as possible for each. * * If we are calculating the data crc (the default), we will * need to map the page. If we have no pages, they have * been revoked, so use the zero page. */ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; while (cursor->total_resid) { struct page *page; size_t page_offset; size_t length; int ret; if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, MSG_MORE); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); return ret; } if (do_datacrc && cursor->need_crc) crc = ceph_crc32c_page(crc, page, page_offset, length); ceph_msg_data_advance(cursor, (size_t)ret); } dout("%s %p msg %p done\n", __func__, con, msg); /* prepare and queue up footer, too */ if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); else msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; con_out_kvec_reset(con); prepare_write_message_footer(con); return 1; /* must return > 0 to indicate success */ } /* * write some zeros */ static int write_partial_skip(struct ceph_connection *con) { int ret; dout("%s %p %d left\n", __func__, con, con->v1.out_skip); while (con->v1.out_skip > 0) { size_t size = min(con->v1.out_skip, (int)PAGE_SIZE); ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size, MSG_MORE); if (ret <= 0) goto out; con->v1.out_skip -= ret; } ret = 1; out: return ret; } /* * Prepare to read connection handshake, or an ack. */ static void prepare_read_banner(struct ceph_connection *con) { dout("prepare_read_banner %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_connect(struct ceph_connection *con) { dout("prepare_read_connect %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); con->v1.in_base_pos = 0; } static void prepare_read_seq(struct ceph_connection *con) { dout("prepare_read_seq %p\n", con); con->v1.in_base_pos = 0; con->v1.in_tag = CEPH_MSGR_TAG_SEQ; } static void prepare_read_tag(struct ceph_connection *con) { dout("prepare_read_tag %p\n", con); con->v1.in_base_pos = 0; con->v1.in_tag = CEPH_MSGR_TAG_READY; } static void prepare_read_keepalive_ack(struct ceph_connection *con) { dout("prepare_read_keepalive_ack %p\n", con); con->v1.in_base_pos = 0; } /* * Prepare to read a message. */ static int prepare_read_message(struct ceph_connection *con) { dout("prepare_read_message %p\n", con); BUG_ON(con->in_msg != NULL); con->v1.in_base_pos = 0; con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; return 0; } static int read_partial(struct ceph_connection *con, int end, int size, void *object) { while (con->v1.in_base_pos < end) { int left = end - con->v1.in_base_pos; int have = size - left; int ret = ceph_tcp_recvmsg(con->sock, object + have, left); if (ret <= 0) return ret; con->v1.in_base_pos += ret; } return 1; } /* * Read all or part of the connect-side handshake on a new connection */ static int read_partial_banner(struct ceph_connection *con) { int size; int end; int ret; dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos); /* peer's banner */ size = strlen(CEPH_BANNER); end = size; ret = read_partial(con, end, size, con->v1.in_banner); if (ret <= 0) goto out; size = sizeof(con->v1.actual_peer_addr); end += size; ret = read_partial(con, end, size, &con->v1.actual_peer_addr); if (ret <= 0) goto out; ceph_decode_banner_addr(&con->v1.actual_peer_addr); size = sizeof(con->v1.peer_addr_for_me); end += size; ret = read_partial(con, end, size, &con->v1.peer_addr_for_me); if (ret <= 0) goto out; ceph_decode_banner_addr(&con->v1.peer_addr_for_me); out: return ret; } static int read_partial_connect(struct ceph_connection *con) { int size; int end; int ret; dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos); size = sizeof(con->v1.in_reply); end = size; ret = read_partial(con, end, size, &con->v1.in_reply); if (ret <= 0) goto out; if (con->v1.auth) { size = le32_to_cpu(con->v1.in_reply.authorizer_len); if (size > con->v1.auth->authorizer_reply_buf_len) { pr_err("authorizer reply too big: %d > %zu\n", size, con->v1.auth->authorizer_reply_buf_len); ret = -EINVAL; goto out; } end += size; ret = read_partial(con, end, size, con->v1.auth->authorizer_reply_buf); if (ret <= 0) goto out; } dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", con, con->v1.in_reply.tag, le32_to_cpu(con->v1.in_reply.connect_seq), le32_to_cpu(con->v1.in_reply.global_seq)); out: return ret; } /* * Verify the hello banner looks okay. */ static int verify_hello(struct ceph_connection *con) { if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { pr_err("connect to %s got bad banner\n", ceph_pr_addr(&con->peer_addr)); con->error_msg = "protocol error, bad banner"; return -1; } return 0; } static int process_banner(struct ceph_connection *con) { struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; dout("process_banner on %p\n", con); if (verify_hello(con) < 0) return -1; /* * Make sure the other end is who we wanted. note that the other * end may not yet know their ip address, so if it's 0.0.0.0, give * them the benefit of the doubt. */ if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr, sizeof(con->peer_addr)) != 0 && !(ceph_addr_is_blank(&con->v1.actual_peer_addr) && con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) { pr_warn("wrong peer, want %s/%u, got %s/%u\n", ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce), ceph_pr_addr(&con->v1.actual_peer_addr), le32_to_cpu(con->v1.actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } /* * did we learn our address? */ if (ceph_addr_is_blank(my_addr)) { memcpy(&my_addr->in_addr, &con->v1.peer_addr_for_me.in_addr, sizeof(con->v1.peer_addr_for_me.in_addr)); ceph_addr_set_port(my_addr, 0); ceph_encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", ceph_pr_addr(my_addr)); } return 0; } static int process_connect(struct ceph_connection *con) { u64 sup_feat = from_msgr(con->msgr)->supported_features; u64 req_feat = from_msgr(con->msgr)->required_features; u64 server_feat = le64_to_cpu(con->v1.in_reply.features); int ret; dout("process_connect on %p tag %d\n", con, con->v1.in_tag); if (con->v1.auth) { int len = le32_to_cpu(con->v1.in_reply.authorizer_len); /* * Any connection that defines ->get_authorizer() * should also define ->add_authorizer_challenge() and * ->verify_authorizer_reply(). * * See get_connect_authorizer(). */ if (con->v1.in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ret = con->ops->add_authorizer_challenge( con, con->v1.auth->authorizer_reply_buf, len); if (ret < 0) return ret; con_out_kvec_reset(con); __prepare_write_connect(con); prepare_read_connect(con); return 0; } if (len) { ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; return ret; } } } switch (con->v1.in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," " my %llx < server's %llx, missing %llx\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; return -1; case CEPH_MSGR_TAG_BADPROTOVER: pr_err("%s%lld %s protocol version mismatch," " my %d != server's %d\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->v1.out_connect.protocol_version), le32_to_cpu(con->v1.in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; return -1; case CEPH_MSGR_TAG_BADAUTHORIZER: con->v1.auth_retry++; dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, con->v1.auth_retry); if (con->v1.auth_retry == 2) { con->error_msg = "connect authorization failure"; return -1; } con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_RESETSESSION: /* * If we connected with a large connect_seq but the peer * has no record of a session with us (no connection, or * connect_seq == 0), they will send RESETSESION to indicate * that they must have reset their session, and may have * dropped messages. */ dout("process_connect got RESET peer seq %u\n", le32_to_cpu(con->v1.in_reply.connect_seq)); pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr)); ceph_con_reset_session(con); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); /* Tell ceph about it. */ mutex_unlock(&con->mutex); if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); if (con->state != CEPH_CON_S_V1_CONNECT_MSG) return -EAGAIN; break; case CEPH_MSGR_TAG_RETRY_SESSION: /* * If we sent a smaller connect_seq than the peer has, try * again with a larger value. */ dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", le32_to_cpu(con->v1.out_connect.connect_seq), le32_to_cpu(con->v1.in_reply.connect_seq)); con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_RETRY_GLOBAL: /* * If we sent a smaller global_seq than the peer has, try * again with a larger value. */ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", con->v1.peer_global_seq, le32_to_cpu(con->v1.in_reply.global_seq)); ceph_get_global_seq(con->msgr, le32_to_cpu(con->v1.in_reply.global_seq)); con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; prepare_read_connect(con); break; case CEPH_MSGR_TAG_SEQ: case CEPH_MSGR_TAG_READY: if (req_feat & ~server_feat) { pr_err("%s%lld %s protocol feature mismatch," " my required %llx > server's %llx, need %llx\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; return -1; } WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG); con->state = CEPH_CON_S_OPEN; con->v1.auth_retry = 0; /* we authenticated; clear flag */ con->v1.peer_global_seq = le32_to_cpu(con->v1.in_reply.global_seq); con->v1.connect_seq++; con->peer_features = server_feat; dout("process_connect got READY gseq %d cseq %d (%d)\n", con->v1.peer_global_seq, le32_to_cpu(con->v1.in_reply.connect_seq), con->v1.connect_seq); WARN_ON(con->v1.connect_seq != le32_to_cpu(con->v1.in_reply.connect_seq)); if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY) ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX); con->delay = 0; /* reset backoff memory */ if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) { prepare_write_seq(con); prepare_read_seq(con); } else { prepare_read_tag(con); } break; case CEPH_MSGR_TAG_WAIT: /* * If there is a connection race (we are opening * connections to each other), one of us may just have * to WAIT. This shouldn't happen if we are the * client. */ con->error_msg = "protocol error, got WAIT as client"; return -1; default: con->error_msg = "protocol error, garbage tag during connect"; return -1; } return 0; } /* * read (part of) an ack */ static int read_partial_ack(struct ceph_connection *con) { int size = sizeof(con->v1.in_temp_ack); int end = size; return read_partial(con, end, size, &con->v1.in_temp_ack); } /* * We can finally discard anything that's been acked. */ static void process_ack(struct ceph_connection *con) { u64 ack = le64_to_cpu(con->v1.in_temp_ack); if (con->v1.in_tag == CEPH_MSGR_TAG_ACK) ceph_con_discard_sent(con, ack); else ceph_con_discard_requeued(con, ack); prepare_read_tag(con); } static int read_partial_message_chunk(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { int ret, left; BUG_ON(!section); while (section->iov_len < sec_len) { BUG_ON(section->iov_base == NULL); left = sec_len - section->iov_len; ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + section->iov_len, left); if (ret <= 0) return ret; section->iov_len += ret; } if (section->iov_len == sec_len) *crc = crc32c(*crc, section->iov_base, section->iov_len); return 1; } static inline int read_partial_message_section(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) { *crc = 0; return read_partial_message_chunk(con, section, sec_len, crc); } static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); if (do_bounce && unlikely(!con->bounce_page)) { con->bounce_page = alloc_page(GFP_NOIO); if (!con->bounce_page) { pr_err("failed to allocate bounce page\n"); return -ENOMEM; } } while (cursor->sr_resid > 0) { struct page *page, *rpage; size_t off, len; int ret; page = ceph_msg_data_next(cursor, &off, &len); rpage = do_bounce ? con->bounce_page : page; /* clamp to what remains in extent */ len = min_t(int, len, cursor->sr_resid); ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); if (ret <= 0) return ret; *crc = ceph_crc32c_page(*crc, rpage, off, ret); ceph_msg_data_advance(cursor, (size_t)ret); cursor->sr_resid -= ret; if (do_bounce) memcpy_page(page, off, rpage, off, ret); } return 1; } static int read_partial_sparse_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc = 0; int ret = 1; if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { if (con->v1.in_sr_kvec.iov_base) ret = read_partial_message_chunk(con, &con->v1.in_sr_kvec, con->v1.in_sr_len, &crc); else if (cursor->sr_resid > 0) ret = read_partial_sparse_msg_extent(con, &crc); if (ret <= 0) break; memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); ret = con->ops->sparse_read(con, cursor, (char **)&con->v1.in_sr_kvec.iov_base); if (ret <= 0) { ret = ret ? ret : 1; /* must return > 0 to indicate success */ break; } con->v1.in_sr_len = ret; } if (do_datacrc) con->in_data_crc = crc; return ret; } static int read_partial_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); struct page *page; size_t page_offset; size_t length; u32 crc = 0; int ret; if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { if (do_datacrc) con->in_data_crc = crc; return ret; } if (do_datacrc) crc = ceph_crc32c_page(crc, page, page_offset, ret); ceph_msg_data_advance(cursor, (size_t)ret); } if (do_datacrc) con->in_data_crc = crc; return 1; /* must return > 0 to indicate success */ } static int read_partial_msg_data_bounce(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; struct page *page; size_t off, len; u32 crc; int ret; if (unlikely(!con->bounce_page)) { con->bounce_page = alloc_page(GFP_NOIO); if (!con->bounce_page) { pr_err("failed to allocate bounce page\n"); return -ENOMEM; } } crc = con->in_data_crc; while (cursor->total_resid) { if (!cursor->resid) { ceph_msg_data_advance(cursor, 0); continue; } page = ceph_msg_data_next(cursor, &off, &len); ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); if (ret <= 0) { con->in_data_crc = crc; return ret; } crc = crc32c(crc, page_address(con->bounce_page), ret); memcpy_to_page(page, off, page_address(con->bounce_page), ret); ceph_msg_data_advance(cursor, ret); } con->in_data_crc = crc; return 1; /* must return > 0 to indicate success */ } /* * read (part of) a message. */ static int read_partial_message(struct ceph_connection *con) { struct ceph_msg *m = con->in_msg; int size; int end; int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); u64 seq; u32 crc; dout("read_partial_message con %p msg %p\n", con, m); /* header */ size = sizeof(con->v1.in_hdr); end = size; ret = read_partial(con, end, size, &con->v1.in_hdr); if (ret <= 0) return ret; crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc)); if (cpu_to_le32(crc) != con->v1.in_hdr.crc) { pr_err("read_partial_message bad hdr crc %u != expected %u\n", crc, con->v1.in_hdr.crc); return -EBADMSG; } front_len = le32_to_cpu(con->v1.in_hdr.front_len); if (front_len > CEPH_MSG_MAX_FRONT_LEN) return -EIO; middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) return -EIO; data_len = le32_to_cpu(con->v1.in_hdr.data_len); if (data_len > CEPH_MSG_MAX_DATA_LEN) return -EIO; /* verify seq# */ seq = le64_to_cpu(con->v1.in_hdr.seq); if ((s64)seq - (s64)con->in_seq < 1) { pr_info("skipping %s%lld %s seq %lld expected %lld\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr), seq, con->in_seq + 1); con->v1.in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); con->v1.in_tag = CEPH_MSGR_TAG_READY; return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); con->error_msg = "bad message sequence # for incoming message"; return -EBADE; } /* allocate message? */ if (!con->in_msg) { int skip = 0; dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type, front_len, data_len); ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip); if (ret < 0) return ret; BUG_ON((!con->in_msg) ^ skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); con->v1.in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); con->v1.in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; return 1; } BUG_ON(!con->in_msg); BUG_ON(con->in_msg->con != con); m = con->in_msg; m->front.iov_len = 0; /* haven't read it yet */ if (m->middle) m->middle->vec.iov_len = 0; /* prepare for data payload, if any */ if (data_len) prepare_message_data(con->in_msg, data_len); } /* front */ ret = read_partial_message_section(con, &m->front, front_len, &con->in_front_crc); if (ret <= 0) return ret; /* middle */ if (m->middle) { ret = read_partial_message_section(con, &m->middle->vec, middle_len, &con->in_middle_crc); if (ret <= 0) return ret; } /* (page) data */ if (data_len) { if (!m->num_data_items) return -EIO; if (m->sparse_read_total) ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else ret = read_partial_msg_data(con); if (ret <= 0) return ret; } /* footer */ size = sizeof_footer(con); end += size; ret = read_partial(con, end, size, &m->footer); if (ret <= 0) return ret; if (!need_sign) { m->footer.flags = m->old_footer.flags; m->footer.sig = 0; } dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", m, front_len, m->footer.front_crc, middle_len, m->footer.middle_crc, data_len, m->footer.data_crc); /* crc ok? */ if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { pr_err("read_partial_message %p front crc %u != exp. %u\n", m, con->in_front_crc, m->footer.front_crc); return -EBADMSG; } if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { pr_err("read_partial_message %p middle crc %u != exp %u\n", m, con->in_middle_crc, m->footer.middle_crc); return -EBADMSG; } if (do_datacrc && (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { pr_err("read_partial_message %p data crc %u != exp. %u\n", m, con->in_data_crc, le32_to_cpu(m->footer.data_crc)); return -EBADMSG; } if (need_sign && con->ops->check_message_signature && con->ops->check_message_signature(m)) { pr_err("read_partial_message %p signature check failed\n", m); return -EBADMSG; } return 1; /* done! */ } static int read_keepalive_ack(struct ceph_connection *con) { struct ceph_timespec ceph_ts; size_t size = sizeof(ceph_ts); int ret = read_partial(con, size, size, &ceph_ts); if (ret <= 0) return ret; ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); prepare_read_tag(con); return 1; } /* * Read what we can from the socket. */ int ceph_con_v1_try_read(struct ceph_connection *con) { int ret = -1; more: dout("try_read start %p state %d\n", con, con->state); if (con->state != CEPH_CON_S_V1_BANNER && con->state != CEPH_CON_S_V1_CONNECT_MSG && con->state != CEPH_CON_S_OPEN) return 0; BUG_ON(!con->sock); dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag, con->v1.in_base_pos); if (con->state == CEPH_CON_S_V1_BANNER) { ret = read_partial_banner(con); if (ret <= 0) goto out; ret = process_banner(con); if (ret < 0) goto out; con->state = CEPH_CON_S_V1_CONNECT_MSG; /* * Received banner is good, exchange connection info. * Do not reset out_kvec, as sending our banner raced * with receiving peer banner after connect completed. */ ret = prepare_write_connect(con); if (ret < 0) goto out; prepare_read_connect(con); /* Send connection info before awaiting response */ goto out; } if (con->state == CEPH_CON_S_V1_CONNECT_MSG) { ret = read_partial_connect(con); if (ret <= 0) goto out; ret = process_connect(con); if (ret < 0) goto out; goto more; } WARN_ON(con->state != CEPH_CON_S_OPEN); if (con->v1.in_base_pos < 0) { /* * skipping + discarding content. */ ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos); if (ret <= 0) goto out; dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos); con->v1.in_base_pos += ret; if (con->v1.in_base_pos) goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_READY) { /* * what's next? */ ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1); if (ret <= 0) goto out; dout("try_read got tag %d\n", con->v1.in_tag); switch (con->v1.in_tag) { case CEPH_MSGR_TAG_MSG: prepare_read_message(con); break; case CEPH_MSGR_TAG_ACK: prepare_read_ack(con); break; case CEPH_MSGR_TAG_KEEPALIVE2_ACK: prepare_read_keepalive_ack(con); break; case CEPH_MSGR_TAG_CLOSE: ceph_con_close_socket(con); con->state = CEPH_CON_S_CLOSED; goto out; default: goto bad_tag; } } if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) { ret = read_partial_message(con); if (ret <= 0) { switch (ret) { case -EBADMSG: con->error_msg = "bad crc/signature"; fallthrough; case -EBADE: ret = -EIO; break; case -EIO: con->error_msg = "io error"; break; } goto out; } if (con->v1.in_tag == CEPH_MSGR_TAG_READY) goto more; ceph_con_process_message(con); if (con->state == CEPH_CON_S_OPEN) prepare_read_tag(con); goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_ACK || con->v1.in_tag == CEPH_MSGR_TAG_SEQ) { /* * the final handshake seq exchange is semantically * equivalent to an ACK */ ret = read_partial_ack(con); if (ret <= 0) goto out; process_ack(con); goto more; } if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { ret = read_keepalive_ack(con); if (ret <= 0) goto out; goto more; } out: dout("try_read done on %p ret %d\n", con, ret); return ret; bad_tag: pr_err("try_read bad tag %d\n", con->v1.in_tag); con->error_msg = "protocol error, garbage tag"; ret = -1; goto out; } /* * Write something to the socket. Called in a worker thread when the * socket appears to be writeable and we have something ready to send. */ int ceph_con_v1_try_write(struct ceph_connection *con) { int ret = 1; dout("try_write start %p state %d\n", con, con->state); if (con->state != CEPH_CON_S_PREOPEN && con->state != CEPH_CON_S_V1_BANNER && con->state != CEPH_CON_S_V1_CONNECT_MSG && con->state != CEPH_CON_S_OPEN) return 0; /* open the socket first? */ if (con->state == CEPH_CON_S_PREOPEN) { BUG_ON(con->sock); con->state = CEPH_CON_S_V1_BANNER; con_out_kvec_reset(con); prepare_write_banner(con); prepare_read_banner(con); BUG_ON(con->in_msg); con->v1.in_tag = CEPH_MSGR_TAG_READY; dout("try_write initiating connect on %p new state %d\n", con, con->state); ret = ceph_tcp_connect(con); if (ret < 0) { con->error_msg = "connect error"; goto out; } } more: dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes); BUG_ON(!con->sock); /* kvec data queued? */ if (con->v1.out_kvec_left) { ret = write_partial_kvec(con); if (ret <= 0) goto out; } if (con->v1.out_skip) { ret = write_partial_skip(con); if (ret <= 0) goto out; } /* msg pages? */ if (con->out_msg) { if (con->v1.out_msg_done) { ceph_msg_put(con->out_msg); con->out_msg = NULL; /* we're done with this one */ goto do_next; } ret = write_partial_message_data(con); if (ret == 1) goto more; /* we need to send the footer, too! */ if (ret == 0) goto out; if (ret < 0) { dout("try_write write_partial_message_data err %d\n", ret); goto out; } } do_next: if (con->state == CEPH_CON_S_OPEN) { if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) { prepare_write_keepalive(con); goto more; } /* is anything else pending? */ if (!list_empty(&con->out_queue)) { prepare_write_message(con); goto more; } if (con->in_seq > con->in_seq_acked) { prepare_write_ack(con); goto more; } } /* Nothing to do! */ ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); dout("try_write nothing else to write.\n"); ret = 0; out: dout("try_write done on %p ret %d\n", con, ret); return ret; } void ceph_con_v1_revoke(struct ceph_connection *con) { struct ceph_msg *msg = con->out_msg; WARN_ON(con->v1.out_skip); /* footer */ if (con->v1.out_msg_done) { con->v1.out_skip += con_out_kvec_skip(con); } else { WARN_ON(!msg->data_length); con->v1.out_skip += sizeof_footer(con); } /* data, middle, front */ if (msg->data_length) con->v1.out_skip += msg->cursor.total_resid; if (msg->middle) con->v1.out_skip += con_out_kvec_skip(con); con->v1.out_skip += con_out_kvec_skip(con); dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con, con->v1.out_kvec_bytes, con->v1.out_skip); } void ceph_con_v1_revoke_incoming(struct ceph_connection *con) { unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len); unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len); /* skip rest of message */ con->v1.in_base_pos = con->v1.in_base_pos - sizeof(struct ceph_msg_header) - front_len - middle_len - data_len - sizeof(struct ceph_msg_footer); con->v1.in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos); } bool ceph_con_v1_opened(struct ceph_connection *con) { return con->v1.connect_seq; } void ceph_con_v1_reset_session(struct ceph_connection *con) { con->v1.connect_seq = 0; con->v1.peer_global_seq = 0; } void ceph_con_v1_reset_protocol(struct ceph_connection *con) { con->v1.out_skip = 0; } |
| 4 9 8 1 2 1 3 4 7 8 1 1 1 1 1 4 3 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | // SPDX-License-Identifier: GPL-2.0-only /* * File: datagram.c * * Datagram (ISI) Phonet sockets * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/socket.h> #include <asm/ioctls.h> #include <net/sock.h> #include <linux/phonet.h> #include <linux/export.h> #include <net/phonet/phonet.h> static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); /* associated socket ceases to exist */ static void pn_sock_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int pn_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; switch (cmd) { case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: { u32 res = *karg; if (res >= 256) return -EINVAL; if (cmd == SIOCPNADDRESOURCE) return pn_sock_bind_res(sk, res); else return pn_sock_unbind_res(sk, res); } } return -ENOIOCTLCMD; } /* Destroy socket. All references are gone. */ static void pn_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } static int pn_init(struct sock *sk) { sk->sk_destruct = pn_destruct; return 0; } static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; int err; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) return -EOPNOTSUPP; if (target == NULL) return -EDESTADDRREQ; if (msg->msg_namelen < sizeof(struct sockaddr_pn)) return -EINVAL; if (target->spn_family != AF_PHONET) return -EAFNOSUPPORT; skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) return err; skb_reserve(skb, MAX_PHONET_HEADER); err = memcpy_from_msg((void *)skb_put(skb, len), msg, len); if (err < 0) { kfree_skb(skb); return err; } /* * Fill in the Phonet header and * finally pass the packet forwards. */ err = pn_skb_send(sk, skb, target); /* If ok, return len. */ return (err >= 0) ? len : err; } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; int rval = -EOPNOTSUPP; int copylen; if (flags & ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) goto out_nofree; skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; pn_skb_get_src_sockaddr(skb, &sa); copylen = skb->len; if (len < copylen) { msg->msg_flags |= MSG_TRUNC; copylen = len; } rval = skb_copy_datagram_msg(skb, 0, msg, copylen); if (rval) { rval = -EFAULT; goto out; } rval = (flags & MSG_TRUNC) ? skb->len : copylen; if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); *addr_len = sizeof(sa); } out: skb_free_datagram(sk, skb); out_nofree: return rval; } /* Queue an skb for a sock. */ static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); return err ? NET_RX_DROP : NET_RX_SUCCESS; } /* Module registration */ static struct proto pn_proto = { .close = pn_sock_close, .ioctl = pn_ioctl, .init = pn_init, .sendmsg = pn_sendmsg, .recvmsg = pn_recvmsg, .backlog_rcv = pn_backlog_rcv, .hash = pn_sock_hash, .unhash = pn_sock_unhash, .get_port = pn_sock_get_port, .obj_size = sizeof(struct pn_sock), .owner = THIS_MODULE, .name = "PHONET", }; static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, }; int __init isi_register(void) { return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); } void __exit isi_unregister(void) { phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); } |
| 1 1 1 9 9 1 1 1 1 1 1 1 2 2 54 38 7 1 5 3 44 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 | /* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> #include <net/addrconf.h> #include "rds_single_path.h" #include "rds.h" #include "ib.h" #include "ib_mr.h" static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; static atomic_t rds_ib_unloading; module_param(rds_ib_mr_1m_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); module_param(rds_ib_mr_8k_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA"); module_param(rds_ib_retry_count, int, 0444); MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); /* * we have a clumsy combination of RCU and a rwsem protecting this list * because it is used both in the get_mr fast path and while blocking in * the FMR flushing path. */ DECLARE_RWSEM(rds_ib_devices_lock); struct list_head rds_ib_devices; /* NOTE: if also grabbing ibdev lock, grab this first */ DEFINE_SPINLOCK(ib_nodev_conns_lock); LIST_HEAD(ib_nodev_conns); static void rds_ib_nodev_connect(void) { struct rds_ib_connection *ic; spin_lock(&ib_nodev_conns_lock); list_for_each_entry(ic, &ib_nodev_conns, ib_node) rds_conn_connect_if_down(ic->conn); spin_unlock(&ib_nodev_conns_lock); } static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) { struct rds_ib_connection *ic; unsigned long flags; spin_lock_irqsave(&rds_ibdev->spinlock, flags); list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) rds_conn_path_drop(&ic->conn->c_path[0], true); spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); } /* * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references * from interrupt context so we push freing off into a work struct in krdsd. */ static void rds_ib_dev_free(struct work_struct *work) { struct rds_ib_ipaddr *i_ipaddr, *i_next; struct rds_ib_device *rds_ibdev = container_of(work, struct rds_ib_device, free_work); if (rds_ibdev->mr_8k_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool); if (rds_ibdev->mr_1m_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); kfree(i_ipaddr); } kfree(rds_ibdev->vector_load); kfree(rds_ibdev); } void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) { BUG_ON(refcount_read(&rds_ibdev->refcount) == 0); if (refcount_dec_and_test(&rds_ibdev->refcount)) queue_work(rds_wq, &rds_ibdev->free_work); } static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) return -EOPNOTSUPP; /* Device must support FRWR */ if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) return -EOPNOTSUPP; rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) return -ENOMEM; spin_lock_init(&rds_ibdev->spinlock); refcount_set(&rds_ibdev->refcount, 1); INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); INIT_LIST_HEAD(&rds_ibdev->conn_list); rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); rds_ibdev->odp_capable = !!(device->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_WRITE) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_READ); rds_ibdev->max_1m_mrs = device->attrs.max_mr ? min_t(unsigned int, (device->attrs.max_mr / 2), rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; rds_ibdev->max_8k_mrs = device->attrs.max_mr ? min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size; rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; rds_ibdev->vector_load = kcalloc(device->num_comp_vectors, sizeof(int), GFP_KERNEL); if (!rds_ibdev->vector_load) { pr_err("RDS/IB: %s failed to allocate vector memory\n", __func__); ret = -ENOMEM; goto put_dev; } rds_ibdev->dev = device; rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { ret = PTR_ERR(rds_ibdev->pd); rds_ibdev->pd = NULL; goto put_dev; } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); if (IS_ERR(rds_ibdev->mr_1m_pool)) { ret = PTR_ERR(rds_ibdev->mr_1m_pool); rds_ibdev->mr_1m_pool = NULL; goto put_dev; } rds_ibdev->mr_8k_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); if (IS_ERR(rds_ibdev->mr_8k_pool)) { ret = PTR_ERR(rds_ibdev->mr_8k_pool); rds_ibdev->mr_8k_pool = NULL; goto put_dev; } rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge, rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); pr_info("RDS/IB: %s: added\n", device->name); down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); up_write(&rds_ib_devices_lock); refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); rds_ib_nodev_connect(); return 0; put_dev: rds_ib_dev_put(rds_ibdev); return ret; } /* * New connections use this to find the device to associate with the * connection. It's not in the fast path so we're not concerned about the * performance of the IB call. (As of this writing, it uses an interrupt * blocking spinlock to serialize walking a per-device list of all registered * clients.) * * RCU is used to handle incoming connections racing with device teardown. * Rather than use a lock to serialize removal from the client_data and * getting a new reference, we use an RCU grace period. The destruction * path removes the device from client_data and then waits for all RCU * readers to finish. * * A new connection can get NULL from this if its arriving on a * device that is in the process of being removed. */ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) { struct rds_ib_device *rds_ibdev; rcu_read_lock(); rds_ibdev = ib_get_client_data(device, &rds_ib_client); if (rds_ibdev) refcount_inc(&rds_ibdev->refcount); rcu_read_unlock(); return rds_ibdev; } /* * The IB stack is letting us know that a device is going away. This can * happen if the underlying HCA driver is removed or if PCI hotplug is removing * the pci function, for example. * * This can be called at any time and can be racing with any other RDS path. */ static void rds_ib_remove_one(struct ib_device *device, void *client_data) { struct rds_ib_device *rds_ibdev = client_data; rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ ib_set_client_data(device, &rds_ib_client, NULL); down_write(&rds_ib_devices_lock); list_del_rcu(&rds_ibdev->list); up_write(&rds_ib_devices_lock); /* * This synchronize rcu is waiting for readers of both the ib * client data and the devices list to finish before we drop * both of those references. */ synchronize_rcu(); rds_ib_dev_put(rds_ibdev); rds_ib_dev_put(rds_ibdev); } struct ib_client rds_ib_client = { .name = "rds_ib", .add = rds_ib_add_one, .remove = rds_ib_remove_one }; static int rds_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds_info_rdma_connection *iinfo = buffer; struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; if (conn->c_isv6) return 0; iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; if (ic) { iinfo->tos = conn->c_tos; iinfo->sl = ic->i_sl; } memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, (union ib_gid *)&iinfo->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo->max_send_wr = ic->i_send_ring.w_nr; iinfo->max_recv_wr = ic->i_recv_ring.w_nr; iinfo->max_send_sge = rds_ibdev->max_sge; rds_ib_get_mr_info(rds_ibdev, iinfo); iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs); } return 1; } #if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_conn_info_visitor(). */ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds6_info_rdma_connection *iinfo6 = buffer; struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) return 0; iinfo6->src_addr = conn->c_laddr; iinfo6->dst_addr = conn->c_faddr; if (ic) { iinfo6->tos = conn->c_tos; iinfo6->sl = ic->i_sl; } memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, (union ib_gid *)&iinfo6->dst_gid); rds_ibdev = ic->rds_ibdev; iinfo6->max_send_wr = ic->i_send_ring.w_nr; iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; iinfo6->max_send_sge = rds_ibdev->max_sge; rds6_ib_get_mr_info(rds_ibdev, iinfo6); iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs); } return 1; } #endif static void rds_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8]; rds_for_each_conn_info(sock, len, iter, lens, rds_ib_conn_info_visitor, buffer, sizeof(struct rds_info_rdma_connection)); } #if IS_ENABLED(CONFIG_IPV6) /* IPv6 version of rds_ib_ic_info(). */ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8]; rds_for_each_conn_info(sock, len, iter, lens, rds6_ib_conn_info_visitor, buffer, sizeof(struct rds6_info_rdma_connection)); } #endif /* * Early RDS/IB was built to only bind to an address if there is an IPoIB * device with that address set. * * If it were me, I'd advocate for something more flexible. Sending and * receiving should be device-agnostic. Transports would try and maintain * connections between peers who have messages queued. Userspace would be * allowed to influence which paths have priority. We could call userspace * asserting this policy "routing". */ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, __u32 scope_id) { int ret; struct rdma_cm_id *cm_id; #if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6; #endif struct sockaddr_in sin; struct sockaddr *sa; bool isv4; isv4 = ipv6_addr_v4mapped(addr); /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); if (isv4) { memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = addr->s6_addr32[3]; sa = (struct sockaddr *)&sin; } else { #if IS_ENABLED(CONFIG_IPV6) memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr; sin6.sin6_scope_id = scope_id; sa = (struct sockaddr *)&sin6; /* XXX Do a special IPv6 link local address check here. The * reason is that rdma_bind_addr() always succeeds with IPv6 * link local address regardless it is indeed configured in a * system. */ if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { struct net_device *dev; if (scope_id == 0) { ret = -EADDRNOTAVAIL; goto out; } /* Use init_net for now as RDS is not network * name space aware. */ dev = dev_get_by_index(&init_net, scope_id); if (!dev) { ret = -EADDRNOTAVAIL; goto out; } if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { dev_put(dev); ret = -EADDRNOTAVAIL; goto out; } dev_put(dev); } #else ret = -EADDRNOTAVAIL; goto out; #endif } /* rdma_bind_addr will only succeed for IB & iWARP devices */ ret = rdma_bind_addr(cm_id, sa); /* due to this, we will claim to support iWARP devices unless we check node_type. */ if (ret || !cm_id->device || cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI6c%%%u ret %d node type %d\n", addr, scope_id, ret, cm_id->device ? cm_id->device->node_type : -1); out: rdma_destroy_id(cm_id); return ret; } static void rds_ib_unregister_client(void) { ib_unregister_client(&rds_ib_client); /* wait for rds_ib_dev_free() to complete */ flush_workqueue(rds_wq); } static void rds_ib_set_unloading(void) { atomic_set(&rds_ib_unloading, 1); } static bool rds_ib_is_unloading(struct rds_connection *conn) { struct rds_conn_path *cp = &conn->c_path[0]; return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || atomic_read(&rds_ib_unloading) != 0); } void rds_ib_exit(void) { rds_ib_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); #endif rds_ib_unregister_client(); rds_ib_destroy_nodev_conns(); rds_ib_sysctl_exit(); rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); rds_ib_mr_exit(); } static u8 rds_ib_get_tos_map(u8 tos) { /* 1:1 user to transport map for RDMA transport. * In future, if custom map is desired, hook can export * user configurable map. */ return tos; } struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, .xmit_path_complete = rds_ib_xmit_path_complete, .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, .conn_path_connect = rds_ib_conn_path_connect, .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, .cm_initiate_connect = rds_ib_cm_initiate_connect, .cm_handle_connect = rds_ib_cm_handle_connect, .cm_connect_complete = rds_ib_cm_connect_complete, .stats_info_copy = rds_ib_stats_info_copy, .exit = rds_ib_exit, .get_mr = rds_ib_get_mr, .sync_mr = rds_ib_sync_mr, .free_mr = rds_ib_free_mr, .flush_mrs = rds_ib_flush_mrs, .get_tos_map = rds_ib_get_tos_map, .t_owner = THIS_MODULE, .t_name = "infiniband", .t_unloading = rds_ib_is_unloading, .t_type = RDS_TRANS_IB }; int rds_ib_init(void) { int ret; INIT_LIST_HEAD(&rds_ib_devices); ret = rds_ib_mr_init(); if (ret) goto out; ret = ib_register_client(&rds_ib_client); if (ret) goto out_mr_exit; ret = rds_ib_sysctl_init(); if (ret) goto out_ibreg; ret = rds_ib_recv_init(); if (ret) goto out_sysctl; rds_trans_register(&rds_ib_transport); rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); #endif goto out; out_sysctl: rds_ib_sysctl_exit(); out_ibreg: rds_ib_unregister_client(); out_mr_exit: rds_ib_mr_exit(); out: return ret; } MODULE_LICENSE("GPL"); |
| 312 18 311 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. */ #ifndef BTRFS_VOLUMES_H #define BTRFS_VOLUMES_H #include <linux/blk_types.h> #include <linux/sizes.h> #include <linux/atomic.h> #include <linux/sort.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/log2.h> #include <linux/kobject.h> #include <linux/refcount.h> #include <linux/completion.h> #include <linux/rbtree.h> #include <uapi/linux/btrfs.h> #include "messages.h" #include "rcu-string.h" struct block_device; struct bdev_handle; struct btrfs_fs_info; struct btrfs_block_group; struct btrfs_trans_handle; struct btrfs_zoned_device_info; #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) /* * Arbitratry maximum size of one discard request to limit potentially long time * spent in blkdev_issue_discard(). */ #define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G) extern struct mutex uuid_mutex; #define BTRFS_STRIPE_LEN SZ_64K #define BTRFS_STRIPE_LEN_SHIFT (16) #define BTRFS_STRIPE_LEN_MASK (BTRFS_STRIPE_LEN - 1) static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT); /* Used by sanity check for btrfs_raid_types. */ #define const_ffs(n) (__builtin_ctzll(n) + 1) /* * The conversion from BTRFS_BLOCK_GROUP_* bits to btrfs_raid_type requires * RAID0 always to be the lowest profile bit. * Although it's part of on-disk format and should never change, do extra * compile-time sanity checks. */ static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) < const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0)); static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) > ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK)); /* ilog2() can handle both constants and variables */ #define BTRFS_BG_FLAG_TO_INDEX(profile) \ ilog2((profile) >> (ilog2(BTRFS_BLOCK_GROUP_RAID0) - 1)) enum btrfs_raid_types { /* SINGLE is the special one as it doesn't have on-disk bit. */ BTRFS_RAID_SINGLE = 0, BTRFS_RAID_RAID0 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID0), BTRFS_RAID_RAID1 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1), BTRFS_RAID_DUP = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_DUP), BTRFS_RAID_RAID10 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID10), BTRFS_RAID_RAID5 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID5), BTRFS_RAID_RAID6 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID6), BTRFS_RAID_RAID1C3 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C3), BTRFS_RAID_RAID1C4 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C4), BTRFS_NR_RAID_TYPES }; /* * Use sequence counter to get consistent device stat data on * 32-bit processors. */ #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include <linux/seqlock.h> #define __BTRFS_NEED_DEVICE_DATA_ORDERED #define btrfs_device_data_ordered_init(device) \ seqcount_init(&device->data_seqcount) #else #define btrfs_device_data_ordered_init(device) do { } while (0) #endif #define BTRFS_DEV_STATE_WRITEABLE (0) #define BTRFS_DEV_STATE_IN_FS_METADATA (1) #define BTRFS_DEV_STATE_MISSING (2) #define BTRFS_DEV_STATE_REPLACE_TGT (3) #define BTRFS_DEV_STATE_FLUSH_SENT (4) #define BTRFS_DEV_STATE_NO_READA (5) /* Special value encoding failure to write primary super block. */ #define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2) struct btrfs_fs_devices; struct btrfs_device { struct list_head dev_list; /* device_list_mutex */ struct list_head dev_alloc_list; /* chunk mutex */ struct list_head post_commit_list; /* chunk mutex */ struct btrfs_fs_devices *fs_devices; struct btrfs_fs_info *fs_info; struct rcu_string __rcu *name; u64 generation; struct file *bdev_file; struct block_device *bdev; struct btrfs_zoned_device_info *zone_info; /* * Device's major-minor number. Must be set even if the device is not * opened (bdev == NULL), unless the device is missing. */ dev_t devt; unsigned long dev_state; blk_status_t last_flush_error; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED seqcount_t data_seqcount; #endif /* the internal btrfs device id */ u64 devid; /* size of the device in memory */ u64 total_bytes; /* size of the device on disk */ u64 disk_total_bytes; /* bytes used */ u64 bytes_used; /* optimal io alignment for this device */ u32 io_align; /* optimal io width for this device */ u32 io_width; /* type and info about this device */ u64 type; /* * Counter of super block write errors, values larger than * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure. */ atomic_t sb_write_errors; /* minimal io size for this device */ u32 sector_size; /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_UUID_SIZE]; /* * size of the device on the current transaction * * This variant is update when committing the transaction, * and protected by chunk mutex */ u64 commit_total_bytes; /* bytes used on the current transaction */ u64 commit_bytes_used; /* Bio used for flushing device barriers */ struct bio flush_bio; struct completion flush_wait; /* per-device scrub information */ struct scrub_ctx *scrub_ctx; /* disk I/O failure stats. For detailed description refer to * enum btrfs_dev_stat_values in ioctl.h */ int dev_stats_valid; /* Counter to record the change of device stats */ atomic_t dev_stats_ccnt; atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; struct extent_io_tree alloc_state; struct completion kobj_unregister; /* For sysfs/FSID/devinfo/devid/ */ struct kobject devid_kobj; /* Bandwidth limit for scrub, in bytes */ u64 scrub_speed_max; }; /* * Block group or device which contains an active swapfile. Used for preventing * unsafe operations while a swapfile is active. * * These are sorted on (ptr, inode) (note that a block group or device can * contain more than one swapfile). We compare the pointer values because we * don't actually care what the object is, we just need a quick check whether * the object exists in the rbtree. */ struct btrfs_swapfile_pin { struct rb_node node; void *ptr; struct inode *inode; /* * If true, ptr points to a struct btrfs_block_group. Otherwise, ptr * points to a struct btrfs_device. */ bool is_block_group; /* * Only used when 'is_block_group' is true and it is the number of * extents used by a swapfile for this block group ('ptr' field). */ int bg_extent_count; }; /* * If we read those variants at the context of their own lock, we needn't * use the following helpers, reading them directly is safe. */ #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #define BTRFS_DEVICE_GETSET_FUNCS(name) \ static inline u64 \ btrfs_device_get_##name(const struct btrfs_device *dev) \ { \ u64 size; \ unsigned int seq; \ \ do { \ seq = read_seqcount_begin(&dev->data_seqcount); \ size = dev->name; \ } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ return size; \ } \ \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ preempt_disable(); \ write_seqcount_begin(&dev->data_seqcount); \ dev->name = size; \ write_seqcount_end(&dev->data_seqcount); \ preempt_enable(); \ } #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) #define BTRFS_DEVICE_GETSET_FUNCS(name) \ static inline u64 \ btrfs_device_get_##name(const struct btrfs_device *dev) \ { \ u64 size; \ \ preempt_disable(); \ size = dev->name; \ preempt_enable(); \ return size; \ } \ \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ preempt_disable(); \ dev->name = size; \ preempt_enable(); \ } #else #define BTRFS_DEVICE_GETSET_FUNCS(name) \ static inline u64 \ btrfs_device_get_##name(const struct btrfs_device *dev) \ { \ return dev->name; \ } \ \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ dev->name = size; \ } #endif BTRFS_DEVICE_GETSET_FUNCS(total_bytes); BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); BTRFS_DEVICE_GETSET_FUNCS(bytes_used); enum btrfs_chunk_allocation_policy { BTRFS_CHUNK_ALLOC_REGULAR, BTRFS_CHUNK_ALLOC_ZONED, }; #define BTRFS_DEFAULT_RR_MIN_CONTIG_READ (SZ_256K) /* Keep in sync with raid_attr table, current maximum is RAID1C4. */ #define BTRFS_RAID1_MAX_MIRRORS (4) /* * Read policies for mirrored block group profiles, read picks the stripe based * on these policies. */ enum btrfs_read_policy { /* Use process PID to choose the stripe */ BTRFS_READ_POLICY_PID, #ifdef CONFIG_BTRFS_EXPERIMENTAL /* Balancing RAID1 reads across all striped devices (round-robin). */ BTRFS_READ_POLICY_RR, /* Read from a specific device. */ BTRFS_READ_POLICY_DEVID, #endif BTRFS_NR_READ_POLICY, }; #ifdef CONFIG_BTRFS_EXPERIMENTAL /* * Checksum mode - offload it to workqueues or do it synchronously in * btrfs_submit_chunk(). */ enum btrfs_offload_csum_mode { /* * Choose offloading checksum or do it synchronously automatically. * Do it synchronously if the checksum is fast, or offload to workqueues * otherwise. */ BTRFS_OFFLOAD_CSUM_AUTO, /* Always offload checksum to workqueues. */ BTRFS_OFFLOAD_CSUM_FORCE_ON, /* Never offload checksum to workqueues. */ BTRFS_OFFLOAD_CSUM_FORCE_OFF, }; #endif struct btrfs_fs_devices { u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ /* * UUID written into the btree blocks: * * - If metadata_uuid != fsid then super block must have * BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag set. * * - Following shall be true at all times: * - metadata_uuid == btrfs_header::fsid * - metadata_uuid == btrfs_dev_item::fsid * * - Relations between fsid and metadata_uuid in sb and fs_devices: * - Normal: * fs_devices->fsid == fs_devices->metadata_uuid == sb->fsid * sb->metadata_uuid == 0 * * - When the BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag is set: * fs_devices->fsid == sb->fsid * fs_devices->metadata_uuid == sb->metadata_uuid * * - When in-memory fs_devices->temp_fsid is true * fs_devices->fsid = random * fs_devices->metadata_uuid == sb->fsid */ u8 metadata_uuid[BTRFS_FSID_SIZE]; struct list_head fs_list; /* * Number of devices under this fsid including missing and * replace-target device and excludes seed devices. */ u64 num_devices; /* * The number of devices that successfully opened, including * replace-target, excludes seed devices. */ u64 open_devices; /* The number of devices that are under the chunk allocation list. */ u64 rw_devices; /* Count of missing devices under this fsid excluding seed device. */ u64 missing_devices; u64 total_rw_bytes; /* * Count of devices from btrfs_super_block::num_devices for this fsid, * which includes the seed device, excludes the transient replace-target * device. */ u64 total_devices; /* Highest generation number of seen devices */ u64 latest_generation; /* * The mount device or a device with highest generation after removal * or replace. */ struct btrfs_device *latest_dev; /* * All of the devices in the filesystem, protected by a mutex so we can * safely walk it to write out the super blocks without worrying about * adding/removing by the multi-device code. Scrubbing super block can * kick off supers writing by holding this mutex lock. */ struct mutex device_list_mutex; /* List of all devices, protected by device_list_mutex */ struct list_head devices; /* Devices which can satisfy space allocation. Protected by * chunk_mutex. */ struct list_head alloc_list; struct list_head seed_list; /* Count fs-devices opened. */ int opened; /* Set when we find or add a device that doesn't have the nonrot flag set. */ bool rotating; /* Devices support TRIM/discard commands. */ bool discardable; /* The filesystem is a seed filesystem. */ bool seeding; /* The mount needs to use a randomly generated fsid. */ bool temp_fsid; /* Enable/disable the filesystem stats tracking. */ bool collect_fs_stats; struct btrfs_fs_info *fs_info; /* sysfs kobjects */ struct kobject fsid_kobj; struct kobject *devices_kobj; struct kobject *devinfo_kobj; struct completion kobj_unregister; enum btrfs_chunk_allocation_policy chunk_alloc_policy; /* Policy used to read the mirrored stripes. */ enum btrfs_read_policy read_policy; #ifdef CONFIG_BTRFS_EXPERIMENTAL /* * Minimum contiguous reads before switching to next device, the unit * is one block/sectorsize. */ u32 rr_min_contig_read; /* Device to be used for reading in case of RAID1. */ u64 read_devid; /* Checksum mode - offload it or do it synchronously. */ enum btrfs_offload_csum_mode offload_csum_mode; #endif }; #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ - sizeof(struct btrfs_chunk)) \ / sizeof(struct btrfs_stripe) + 1) #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ - 2 * sizeof(struct btrfs_disk_key) \ - 2 * sizeof(struct btrfs_chunk)) \ / sizeof(struct btrfs_stripe) + 1) struct btrfs_io_stripe { struct btrfs_device *dev; /* Block mapping. */ u64 physical; u64 length; bool rst_search_commit_root; /* For the endio handler. */ struct btrfs_io_context *bioc; }; struct btrfs_discard_stripe { struct btrfs_device *dev; u64 physical; u64 length; }; /* * Context for IO subsmission for device stripe. * * - Track the unfinished mirrors for mirror based profiles * Mirror based profiles are SINGLE/DUP/RAID1/RAID10. * * - Contain the logical -> physical mapping info * Used by submit_stripe_bio() for mapping logical bio * into physical device address. * * - Contain device replace info * Used by handle_ops_on_dev_replace() to copy logical bios * into the new device. * * - Contain RAID56 full stripe logical bytenrs */ struct btrfs_io_context { refcount_t refs; struct btrfs_fs_info *fs_info; /* Taken from struct btrfs_chunk_map::type. */ u64 map_type; struct bio *orig_bio; atomic_t error; u16 max_errors; bool use_rst; u64 logical; u64 size; /* Raid stripe tree ordered entry. */ struct list_head rst_ordered_entry; /* * The total number of stripes, including the extra duplicated * stripe for replace. */ u16 num_stripes; /* * The mirror_num of this bioc. * * This is for reads which use 0 as mirror_num, thus we should return a * valid mirror_num (>0) for the reader. */ u16 mirror_num; /* * The following two members are for dev-replace case only. * * @replace_nr_stripes: Number of duplicated stripes which need to be * written to replace target. * Should be <= 2 (2 for DUP, otherwise <= 1). * @replace_stripe_src: The array indicates where the duplicated stripes * are from. * * The @replace_stripe_src[] array is mostly for RAID56 cases. * As non-RAID56 stripes share the same contents of the mapped range, * thus no need to bother where the duplicated ones are from. * * But for RAID56 case, all stripes contain different contents, thus * we need a way to know the mapping. * * There is an example for the two members, using a RAID5 write: * * num_stripes: 4 (3 + 1 duplicated write) * stripes[0]: dev = devid 1, physical = X * stripes[1]: dev = devid 2, physical = Y * stripes[2]: dev = devid 3, physical = Z * stripes[3]: dev = devid 0, physical = Y * * replace_nr_stripes = 1 * replace_stripe_src = 1 <- Means stripes[1] is involved in replace. * The duplicated stripe index would be * (@num_stripes - 1). * * Note, that we can still have cases replace_nr_stripes = 2 for DUP. * In that case, all stripes share the same content, thus we don't * need to bother @replace_stripe_src value at all. */ u16 replace_nr_stripes; s16 replace_stripe_src; /* * Logical bytenr of the full stripe start, only for RAID56 cases. * * When this value is set to other than (u64)-1, the stripes[] should * follow this pattern: * * (real_stripes = num_stripes - replace_nr_stripes) * (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1)) * * stripes[0]: The first data stripe * stripes[1]: The second data stripe * ... * stripes[data_stripes - 1]: The last data stripe * stripes[data_stripes]: The P stripe * stripes[data_stripes + 1]: The Q stripe (only for RAID6). */ u64 full_stripe_logical; struct btrfs_io_stripe stripes[]; }; struct btrfs_device_info { struct btrfs_device *dev; u64 dev_offset; u64 max_avail; u64 total_avail; }; struct btrfs_raid_attr { u8 sub_stripes; /* sub_stripes info for map */ u8 dev_stripes; /* stripes per dev */ u8 devs_max; /* max devs to use */ u8 devs_min; /* min devs needed */ u8 tolerated_failures; /* max tolerated fail devs */ u8 devs_increment; /* ndevs has to be a multiple of this */ u8 ncopies; /* how many copies to data has */ u8 nparity; /* number of stripes worth of bytes to store * parity information */ u8 mindev_error; /* error code if min devs requisite is unmet */ const char raid_name[8]; /* name of the raid */ u64 bg_flag; /* block group flag of the raid */ }; extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; struct btrfs_chunk_map { struct rb_node rb_node; /* For mount time dev extent verification. */ int verified_stripes; refcount_t refs; u64 start; u64 chunk_len; u64 stripe_size; u64 type; int io_align; int io_width; int num_stripes; int sub_stripes; struct btrfs_io_stripe stripes[]; }; #define btrfs_chunk_map_size(n) (sizeof(struct btrfs_chunk_map) + \ (sizeof(struct btrfs_io_stripe) * (n))) static inline void btrfs_free_chunk_map(struct btrfs_chunk_map *map) { if (map && refcount_dec_and_test(&map->refs)) { ASSERT(RB_EMPTY_NODE(&map->rb_node)); kfree(map); } } struct btrfs_balance_control { struct btrfs_balance_args data; struct btrfs_balance_args meta; struct btrfs_balance_args sys; u64 flags; struct btrfs_balance_progress stat; }; /* * Search for a given device by the set parameters */ struct btrfs_dev_lookup_args { u64 devid; u8 *uuid; u8 *fsid; bool missing; }; /* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */ #define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 } #define BTRFS_DEV_LOOKUP_ARGS(name) \ struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT enum btrfs_map_op { BTRFS_MAP_READ, BTRFS_MAP_WRITE, BTRFS_MAP_GET_READ_MIRRORS, }; static inline enum btrfs_map_op btrfs_op(struct bio *bio) { switch (bio_op(bio)) { case REQ_OP_WRITE: case REQ_OP_ZONE_APPEND: return BTRFS_MAP_WRITE; default: WARN_ON_ONCE(1); fallthrough; case REQ_OP_READ: return BTRFS_MAP_READ; } } static inline unsigned long btrfs_chunk_item_size(int num_stripes) { ASSERT(num_stripes); return sizeof(struct btrfs_chunk) + sizeof(struct btrfs_stripe) * (num_stripes - 1); } /* * Do the type safe conversion from stripe_nr to offset inside the chunk. * * @stripe_nr is u32, with left shift it can overflow u32 for chunks larger * than 4G. This does the proper type cast to avoid overflow. */ static inline u64 btrfs_stripe_nr_to_offset(u32 stripe_nr) { return (u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT; } void btrfs_get_bioc(struct btrfs_io_context *bioc); void btrfs_put_bioc(struct btrfs_io_context *bioc); int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_io_context **bioc_ret, struct btrfs_io_stripe *smap, int *mirror_num_ret); int btrfs_map_repair_block(struct btrfs_fs_info *fs_info, struct btrfs_io_stripe *smap, u64 logical, u32 length, int mirror_num); struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, u64 logical, u64 *length_ret, u32 *num_stripes); int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, blk_mode_t flags, void *holder); struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool mount_arg_dev); int btrfs_forget_devices(dev_t devt); void btrfs_close_devices(struct btrfs_fs_devices *fs_devices); void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices); void btrfs_assign_next_active_device(struct btrfs_device *device, struct btrfs_device *this_dev); struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid, const char *devpath); int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, const char *path); struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, const u64 *devid, const u8 *uuid, const char *path); void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, struct file **bdev_file); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size); struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices, const struct btrfs_dev_lookup_args *args); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); int btrfs_balance(struct btrfs_fs_info *fs_info, struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf); int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats); int btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev); void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, u64 logical); u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map); int btrfs_nr_parity_stripes(u64 type); int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, struct btrfs_block_group *bg); int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_chunk_map *btrfs_alloc_chunk_map(int num_stripes, gfp_t gfp); int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map); #endif struct btrfs_chunk_map *btrfs_find_chunk_map(struct btrfs_fs_info *fs_info, u64 logical, u64 length); struct btrfs_chunk_map *btrfs_find_chunk_map_nolock(struct btrfs_fs_info *fs_info, u64 logical, u64 length); struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, u64 logical, u64 length); void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map); void btrfs_release_disk_super(struct btrfs_super_block *super); static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, int index) { atomic_inc(dev->dev_stat_values + index); /* * This memory barrier orders stores updating statistics before stores * updating dev_stats_ccnt. * * It pairs with smp_rmb() in btrfs_run_dev_stats(). */ smp_mb__before_atomic(); atomic_inc(&dev->dev_stats_ccnt); } static inline int btrfs_dev_stat_read(struct btrfs_device *dev, int index) { return atomic_read(dev->dev_stat_values + index); } static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, int index) { int ret; ret = atomic_xchg(dev->dev_stat_values + index, 0); /* * atomic_xchg implies a full memory barriers as per atomic_t.txt: * - RMW operations that have a return value are fully ordered; * * This implicit memory barriers is paired with the smp_rmb in * btrfs_run_dev_stats */ atomic_inc(&dev->dev_stats_ccnt); return ret; } static inline void btrfs_dev_stat_set(struct btrfs_device *dev, int index, unsigned long val) { atomic_set(dev->dev_stat_values + index, val); /* * This memory barrier orders stores updating statistics before stores * updating dev_stats_ccnt. * * It pairs with smp_rmb() in btrfs_run_dev_stats(). */ smp_mb__before_atomic(); atomic_inc(&dev->dev_stats_ccnt); } static inline const char *btrfs_dev_name(const struct btrfs_device *device) { if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) return "<missing disk>"; else return rcu_str_deref(device->name); } void btrfs_commit_device_sizes(struct btrfs_transaction *trans); struct list_head * __attribute_const__ btrfs_get_fs_uuids(void); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device); enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags); int btrfs_bg_type_to_factor(u64 flags); const char *btrfs_bg_type_to_raid_name(u64 flags); int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical); bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, u64 logical, u16 total_stripes); #endif #endif |
| 44 44 27 44 17 17 17 140 140 118 117 38 38 38 31 108 12 17 3 16 12 8 2 6 2 9 2 1 1 29 29 2 29 20 11 12 39 35 30 35 3 33 3 4 4 164 164 4 161 2 156 3 33 124 2 6 152 6 151 1 148 7 181 181 114 1 119 162 3 161 130 1 147 43 15 119 119 88 86 35 43 80 88 38 1 23 2 5 22 21 21 21 44 44 44 14 27 31 14 44 44 9 43 25 2 1 16 17 28 3 5 20 193 193 88 88 66 1 19 3 14 2 86 36 3 77 77 88 88 88 52 52 7 1 6 1 1 1 4 2 2 1 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 | // SPDX-License-Identifier: GPL-2.0+ /* * NILFS inode operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. * */ #include <linux/buffer_head.h> #include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/uio.h> #include <linux/fiemap.h> #include <linux/random.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" #include "cpfile.h" #include "ifile.h" /** * struct nilfs_iget_args - arguments used during comparison between inodes * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; inode_add_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_add(n, &root->blocks_count); } void nilfs_inode_sub_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; inode_sub_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_sub(n, &root->blocks_count); } /** * nilfs_get_block() - get a file block on the filesystem (callback function) * @inode: inode struct of the target file * @blkoff: file block number * @bh_result: buffer head to be mapped on * @create: indicate whether allocating the block or not when it has not * been allocated yet. * * This function does not issue actual read request of the specified data * block. It is done by VFS. */ int nilfs_get_block(struct inode *inode, sector_t blkoff, struct buffer_head *bh_result, int create) { struct nilfs_inode_info *ii = NILFS_I(inode); struct the_nilfs *nilfs = inode->i_sb->s_fs_info; __u64 blknum = 0; int err = 0, ret; unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); if (ret >= 0) { /* found */ map_bh(bh_result, inode->i_sb, blknum); if (ret > 0) bh_result->b_size = (ret << inode->i_blkbits); goto out; } /* data block was not found */ if (ret == -ENOENT && create) { struct nilfs_transaction_info ti; bh_result->b_blocknr = 0; err = nilfs_transaction_begin(inode->i_sb, &ti, 1); if (unlikely(err)) goto out; err = nilfs_bmap_insert(ii->i_bmap, blkoff, (unsigned long)bh_result); if (unlikely(err != 0)) { if (err == -EEXIST) { /* * The get_block() function could be called * from multiple callers for an inode. * However, the page having this block must * be locked in this case. */ nilfs_warn(inode->i_sb, "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", __func__, inode->i_ino, (unsigned long long)blkoff); err = -EAGAIN; } nilfs_transaction_abort(inode->i_sb); goto out; } nilfs_mark_inode_dirty_sync(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); set_buffer_delay(bh_result); map_bh(bh_result, inode->i_sb, 0); /* Disk block number must be changed to proper value */ } else if (ret == -ENOENT) { /* * not found is not error (e.g. hole); must return without * the mapped state flag. */ ; } else { err = ret; } out: return err; } /** * nilfs_read_folio() - implement read_folio() method of nilfs_aops {} * address_space_operations. * @file: file struct of the file to be read * @folio: the folio to be read */ static int nilfs_read_folio(struct file *file, struct folio *folio) { return mpage_read_folio(folio, nilfs_get_block); } static void nilfs_readahead(struct readahead_control *rac) { mpage_readahead(rac, nilfs_get_block); } static int nilfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; int err = 0; if (sb_rdonly(inode->i_sb)) { nilfs_clear_dirty_pages(mapping); return -EROFS; } if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_dsync_segment(inode->i_sb, inode, wbc->range_start, wbc->range_end); return err; } static bool nilfs_dirty_folio(struct address_space *mapping, struct folio *folio) { struct inode *inode = mapping->host; struct buffer_head *head; unsigned int nr_dirty = 0; bool ret = filemap_dirty_folio(mapping, folio); /* * The page may not be locked, eg if called from try_to_unmap_one() */ spin_lock(&mapping->i_private_lock); head = folio_buffers(folio); if (head) { struct buffer_head *bh = head; do { /* Do not mark hole blocks dirty */ if (buffer_dirty(bh) || !buffer_mapped(bh)) continue; set_buffer_dirty(bh); nr_dirty++; } while (bh = bh->b_this_page, bh != head); } else if (ret) { nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); } spin_unlock(&mapping->i_private_lock); if (nr_dirty) nilfs_set_file_dirty(inode, nr_dirty); return ret; } void nilfs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); nilfs_truncate(inode); } } static int nilfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); if (unlikely(err)) return err; err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); if (unlikely(err)) { nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); } return err; } static int nilfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; unsigned int start = pos & (PAGE_SIZE - 1); unsigned int nr_dirty; int err; nr_dirty = nilfs_page_count_clean_buffers(folio, start, start + copied); copied = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); return err ? : copied; } static ssize_t nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = file_inode(iocb->ki_filp); if (iov_iter_rw(iter) == WRITE) return 0; /* Needs synchronization with the cleaner */ return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); } const struct address_space_operations nilfs_aops = { .read_folio = nilfs_read_folio, .writepages = nilfs_writepages, .dirty_folio = nilfs_dirty_folio, .readahead = nilfs_readahead, .write_begin = nilfs_write_begin, .write_end = nilfs_write_end, .invalidate_folio = block_invalidate_folio, .direct_IO = nilfs_direct_IO, .migrate_folio = buffer_migrate_folio_norefs, .is_partially_uptodate = block_is_partially_uptodate, }; const struct address_space_operations nilfs_buffer_cache_aops = { .invalidate_folio = block_invalidate_folio, }; static int nilfs_insert_inode_locked(struct inode *inode, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); } struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; struct buffer_head *bh; int err = -ENOMEM; ino_t ino; inode = new_inode(sb); if (unlikely(!inode)) goto failed; mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ ii->i_bh = bh; atomic64_inc(&root->inodes_count); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; simple_inode_init_ts(inode); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); if (err < 0) goto failed_after_creation; set_bit(NILFS_I_BMAP, &ii->i_state); /* No lock is needed; iget() ensures it. */ } ii->i_flags = nilfs_mask_flags( mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; } err = nilfs_init_acl(inode, dir); if (unlikely(err)) /* * Never occur. When supporting nilfs_init_acl(), * proper cancellation of above jobs should be considered. */ goto failed_after_creation; return inode; failed_after_creation: clear_nlink(inode); if (inode->i_state & I_NEW) unlock_new_inode(inode); iput(inode); /* * raw_inode will be deleted through * nilfs_evict_inode(). */ goto failed; failed_ifile_create_inode: make_bad_inode(inode); iput(inode); failed: return ERR_PTR(err); } void nilfs_set_inode_flags(struct inode *inode) { unsigned int flags = NILFS_I(inode)->i_flags; unsigned int new_fl = 0; if (flags & FS_SYNC_FL) new_fl |= S_SYNC; if (flags & FS_APPEND_FL) new_fl |= S_APPEND; if (flags & FS_IMMUTABLE_FL) new_fl |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC); } int nilfs_read_inode_common(struct inode *inode, struct nilfs_inode *raw_inode) { struct nilfs_inode_info *ii = NILFS_I(inode); int err; inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), le32_to_cpu(raw_inode->i_mtime_nsec)); inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), le32_to_cpu(raw_inode->i_ctime_nsec)); inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), le32_to_cpu(raw_inode->i_mtime_nsec)); if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) return -EIO; /* this inode is for metadata and corrupted */ if (inode->i_nlink == 0) return -ESTALE; /* this inode is deleted */ inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); ii->i_flags = le32_to_cpu(raw_inode->i_flags); #if 0 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); ii->i_dir_acl = S_ISREG(inode->i_mode) ? 0 : le32_to_cpu(raw_inode->i_dir_acl); #endif ii->i_dir_start_lookup = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { err = nilfs_bmap_read(ii->i_bmap, raw_inode); if (err < 0) return err; set_bit(NILFS_I_BMAP, &ii->i_state); /* No lock is needed; iget() ensures it. */ } return 0; } static int __nilfs_read_inode(struct super_block *sb, struct nilfs_root *root, unsigned long ino, struct inode *inode) { struct the_nilfs *nilfs = sb->s_fs_info; struct buffer_head *bh; struct nilfs_inode *raw_inode; int err; down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); if (unlikely(err)) goto bad_inode; raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); err = nilfs_read_inode_common(inode, raw_inode); if (err) goto failed_unmap; if (S_ISREG(inode->i_mode)) { inode->i_op = &nilfs_file_inode_operations; inode->i_fop = &nilfs_file_operations; inode->i_mapping->a_ops = &nilfs_aops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &nilfs_dir_inode_operations; inode->i_fop = &nilfs_dir_operations; inode->i_mapping->a_ops = &nilfs_aops; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &nilfs_aops; } else { inode->i_op = &nilfs_special_inode_operations; init_special_inode( inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); } nilfs_ifile_unmap_inode(raw_inode); brelse(bh); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); return 0; failed_unmap: nilfs_ifile_unmap_inode(raw_inode); brelse(bh); bad_inode: up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); return err; } static int nilfs_iget_test(struct inode *inode, void *opaque) { struct nilfs_iget_args *args = opaque; struct nilfs_inode_info *ii; if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) return 0; ii = NILFS_I(inode); if (ii->i_type != args->type) return 0; return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) { struct nilfs_iget_args *args = opaque; inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); return 0; } struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); } struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); } struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct inode *inode; int err; inode = nilfs_iget_locked(sb, root, ino); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { if (!inode->i_nlink) { iput(inode); return ERR_PTR(-ESTALE); } return inode; } err = __nilfs_read_inode(sb, root, ino, inode); if (unlikely(err)) { iget_failed(inode); return ERR_PTR(err); } unlock_new_inode(inode); return inode; } struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; err = nilfs_init_gcinode(inode); if (unlikely(err)) { iget_failed(inode); return ERR_PTR(err); } unlock_new_inode(inode); return inode; } /** * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode * @inode: inode object * * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, * or does nothing if the inode already has it. This function allocates * an additional inode to maintain page cache of B-tree nodes one-on-one. * * Return Value: On success, 0 is returned. On errors, one of the following * negative error code is returned. * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_btree_node_cache(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); struct inode *btnc_inode; struct nilfs_iget_args args; if (ii->i_assoc_inode) return 0; args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!btnc_inode)) return -ENOMEM; if (btnc_inode->i_state & I_NEW) { nilfs_init_btnc_inode(btnc_inode); unlock_new_inode(btnc_inode); } NILFS_I(btnc_inode)->i_assoc_inode = inode; NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; ii->i_assoc_inode = btnc_inode; return 0; } /** * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode * @inode: inode object * * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its * holder inode bound to @inode, or does nothing if @inode doesn't have it. */ void nilfs_detach_btree_node_cache(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); struct inode *btnc_inode = ii->i_assoc_inode; if (btnc_inode) { NILFS_I(btnc_inode)->i_assoc_inode = NULL; ii->i_assoc_inode = NULL; iput(btnc_inode); } } /** * nilfs_iget_for_shadow - obtain inode for shadow mapping * @inode: inode object that uses shadow mapping * * nilfs_iget_for_shadow() allocates a pair of inodes that holds page * caches for shadow mapping. The page cache for data pages is set up * in one inode and the one for b-tree node pages is set up in the * other inode, which is attached to the former inode. * * Return Value: On success, a pointer to the inode for data pages is * returned. On errors, one of the following negative error code is returned * in a pointer type. * * %-ENOMEM - Insufficient memory available. */ struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { .ino = inode->i_ino, .root = NULL, .cno = 0, .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!s_inode)) return ERR_PTR(-ENOMEM); if (!(s_inode->i_state & I_NEW)) return inode; NILFS_I(s_inode)->i_flags = 0; memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; err = nilfs_attach_btree_node_cache(s_inode); if (unlikely(err)) { iget_failed(s_inode); return ERR_PTR(err); } unlock_new_inode(s_inode); return s_inode; } /** * nilfs_write_inode_common - export common inode information to on-disk inode * @inode: inode object * @raw_inode: on-disk inode * * This function writes standard information from the on-memory inode @inode * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap * data is not exported, nilfs_bmap_write() must be called separately during * log writing. */ void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode) { struct nilfs_inode_info *ii = NILFS_I(inode); raw_inode->i_mode = cpu_to_le16(inode->i_mode); raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); /* * When extending inode, nilfs->ns_inode_size should be checked * for substitutions of appended fields. */ } void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) { ino_t ino = inode->i_ino; struct nilfs_inode_info *ii = NILFS_I(inode); struct inode *ifile = ii->i_root->ifile; struct nilfs_inode *raw_inode; raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); if (flags & I_DIRTY_DATASYNC) set_bit(NILFS_I_INODE_SYNC, &ii->i_state); nilfs_write_inode_common(inode, raw_inode); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) raw_inode->i_device_code = cpu_to_le64(huge_encode_dev(inode->i_rdev)); nilfs_ifile_unmap_inode(raw_inode); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, unsigned long from) { __u64 b; int ret; if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; repeat: ret = nilfs_bmap_last_key(ii->i_bmap, &b); if (ret == -ENOENT) return; else if (ret < 0) goto failed; if (b < from) return; b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); ret = nilfs_bmap_truncate(ii->i_bmap, b); nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); if (!ret || (ret == -ENOMEM && nilfs_bmap_truncate(ii->i_bmap, b) == 0)) goto repeat; failed: nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", ret, ii->vfs_inode.i_ino); } void nilfs_truncate(struct inode *inode) { unsigned long blkoff; unsigned int blocksize; struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; blocksize = sb->s_blocksize; blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; nilfs_transaction_begin(sb, &ti, 0); /* never fails */ block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); nilfs_truncate_bmap(ii, blkoff); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_mark_inode_dirty(inode); nilfs_set_file_dirty(inode, 0); nilfs_transaction_commit(sb); /* * May construct a logical segment and may fail in sync mode. * But truncate has no return value. */ } static void nilfs_clear_inode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); /* * Free resources allocated in nilfs_read_inode(), here. */ BUG_ON(!list_empty(&ii->i_dirty)); brelse(ii->i_bh); ii->i_bh = NULL; if (nilfs_is_metadata_file_inode(inode)) nilfs_mdt_clear(inode); if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) nilfs_put_root(ii->i_root); } void nilfs_evict_inode(struct inode *inode) { struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); struct the_nilfs *nilfs; int ret; if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); nilfs_clear_inode(inode); return; } nilfs_transaction_begin(sb, &ti, 0); /* never fails */ truncate_inode_pages_final(&inode->i_data); nilfs = sb->s_fs_info; if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { /* * If this inode is about to be disposed after the file system * has been degraded to read-only due to file system corruption * or after the writer has been detached, do not make any * changes that cause writes, just clear it. * Do this check after read-locking ns_segctor_sem by * nilfs_transaction_begin() in order to avoid a race with * the writer detach operation. */ clear_inode(inode); nilfs_clear_inode(inode); nilfs_transaction_abort(sb); return; } /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); clear_inode(inode); ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); if (!ret) atomic64_dec(&ii->i_root->inodes_count); nilfs_clear_inode(inode); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_transaction_commit(sb); /* * May construct a logical segment and may fail in sync mode. * But delete_inode has no return value. */ } int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct nilfs_transaction_info ti; struct inode *inode = d_inode(dentry); struct super_block *sb = inode->i_sb; int err; err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (err) return err; err = nilfs_transaction_begin(sb, &ti, 0); if (unlikely(err)) return err; if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); truncate_setsize(inode, iattr->ia_size); nilfs_truncate(inode); } setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { err = nilfs_acl_chmod(inode); if (unlikely(err)) goto out_err; } return nilfs_transaction_commit(sb); out_err: nilfs_transaction_abort(sb); return err; } int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct nilfs_root *root = NILFS_I(inode)->i_root; if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ return generic_permission(&nop_mnt_idmap, inode, mask); } int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) { struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct nilfs_inode_info *ii = NILFS_I(inode); int err; spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { spin_unlock(&nilfs->ns_inode_lock); err = nilfs_ifile_get_inode_block(ii->i_root->ifile, inode->i_ino, pbh); if (unlikely(err)) return err; spin_lock(&nilfs->ns_inode_lock); if (ii->i_bh == NULL) ii->i_bh = *pbh; else if (unlikely(!buffer_uptodate(ii->i_bh))) { __brelse(ii->i_bh); ii->i_bh = *pbh; } else { brelse(*pbh); *pbh = ii->i_bh; } } else *pbh = ii->i_bh; get_bh(*pbh); spin_unlock(&nilfs->ns_inode_lock); return 0; } int nilfs_inode_dirty(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); struct the_nilfs *nilfs = inode->i_sb->s_fs_info; int ret = 0; if (!list_empty(&ii->i_dirty)) { spin_lock(&nilfs->ns_inode_lock); ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || test_bit(NILFS_I_BUSY, &ii->i_state); spin_unlock(&nilfs->ns_inode_lock); } return ret; } int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) { struct nilfs_inode_info *ii = NILFS_I(inode); struct the_nilfs *nilfs = inode->i_sb->s_fs_info; atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) return 0; spin_lock(&nilfs->ns_inode_lock); if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && !test_bit(NILFS_I_BUSY, &ii->i_state)) { /* * Because this routine may race with nilfs_dispose_list(), * we have to check NILFS_I_QUEUED here, too. */ if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { /* * This will happen when somebody is freeing * this inode. */ nilfs_warn(inode->i_sb, "cannot set file dirty (ino=%lu): the file is being freed", inode->i_ino); spin_unlock(&nilfs->ns_inode_lock); return -EINVAL; /* * NILFS_I_DIRTY may remain for * freeing inode. */ } list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); set_bit(NILFS_I_QUEUED, &ii->i_state); } spin_unlock(&nilfs->ns_inode_lock); return 0; } int __nilfs_mark_inode_dirty(struct inode *inode, int flags) { struct the_nilfs *nilfs = inode->i_sb->s_fs_info; struct buffer_head *ibh; int err; /* * Do not dirty inodes after the log writer has been detached * and its nilfs_root struct has been freed. */ if (unlikely(nilfs_purging(nilfs))) return 0; err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { nilfs_warn(inode->i_sb, "cannot mark inode dirty (ino=%lu): error %d loading inode block", inode->i_ino, err); return err; } nilfs_update_inode(inode, ibh, flags); mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); return 0; } /** * nilfs_dirty_inode - reflect changes on given inode to an inode block. * @inode: inode of the file to be registered. * @flags: flags to determine the dirty state of the inode * * nilfs_dirty_inode() loads a inode block containing the specified * @inode and copies data from a nilfs_inode to a corresponding inode * entry in the inode block. This operation is excluded from the segment * construction. This function can be called both as a single operation * and as a part of indivisible file operations. */ void nilfs_dirty_inode(struct inode *inode, int flags) { struct nilfs_transaction_info ti; struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { nilfs_warn(inode->i_sb, "tried to mark bad_inode dirty. ignored."); dump_stack(); return; } if (mdi) { nilfs_mdt_mark_dirty(inode); return; } nilfs_transaction_begin(inode->i_sb, &ti, 0); __nilfs_mark_inode_dirty(inode, flags); nilfs_transaction_commit(inode->i_sb); /* never fails */ } int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { struct the_nilfs *nilfs = inode->i_sb->s_fs_info; __u64 logical = 0, phys = 0, size = 0; __u32 flags = 0; loff_t isize; sector_t blkoff, end_blkoff; sector_t delalloc_blkoff; unsigned long delalloc_blklen; unsigned int blkbits = inode->i_blkbits; int ret, n; ret = fiemap_prep(inode, fieinfo, start, &len, 0); if (ret) return ret; inode_lock(inode); isize = i_size_read(inode); blkoff = start >> blkbits; end_blkoff = (start + len - 1) >> blkbits; delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, &delalloc_blkoff); do { __u64 blkphy; unsigned int maxblocks; if (delalloc_blklen && blkoff == delalloc_blkoff) { if (size) { /* End of the current extent */ ret = fiemap_fill_next_extent( fieinfo, logical, phys, size, flags); if (ret) break; } if (blkoff > end_blkoff) break; flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; logical = blkoff << blkbits; phys = 0; size = delalloc_blklen << blkbits; blkoff = delalloc_blkoff + delalloc_blklen; delalloc_blklen = nilfs_find_uncommitted_extent( inode, blkoff, &delalloc_blkoff); continue; } /* * Limit the number of blocks that we look up so as * not to get into the next delayed allocation extent. */ maxblocks = INT_MAX; if (delalloc_blklen) maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, maxblocks); blkphy = 0; down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); n = nilfs_bmap_lookup_contig( NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); if (n < 0) { int past_eof; if (unlikely(n != -ENOENT)) break; /* error */ /* HOLE */ blkoff++; past_eof = ((blkoff << blkbits) >= isize); if (size) { /* End of the current extent */ if (past_eof) flags |= FIEMAP_EXTENT_LAST; ret = fiemap_fill_next_extent( fieinfo, logical, phys, size, flags); if (ret) break; size = 0; } if (blkoff > end_blkoff || past_eof) break; } else { if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ size += n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( fieinfo, logical, phys, size, flags); if (ret || blkoff > end_blkoff) break; /* Start another extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; size = n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; size = n << blkbits; } blkoff += n; } cond_resched(); } while (true); /* If ret is 1 then we just hit the end of the extent array */ if (ret == 1) ret = 0; inode_unlock(inode); return ret; } |
| 19 19 19 19 19 5 4 4 2 1 5 8 6 4 10 19 11 5 5 5 6 2 5 2 5 19 15 5 15 19 4 3 1 2 6 6 13 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 | // SPDX-License-Identifier: GPL-2.0-or-later /* * The AEGIS-128 Authenticated-Encryption Algorithm * * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ #include <crypto/algapi.h> #include <crypto/internal/aead.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <asm/simd.h> #include "aegis.h" #define AEGIS128_NONCE_SIZE 16 #define AEGIS128_STATE_BLOCKS 5 #define AEGIS128_KEY_SIZE 16 #define AEGIS128_MIN_AUTH_SIZE 8 #define AEGIS128_MAX_AUTH_SIZE 16 struct aegis_state { union aegis_block blocks[AEGIS128_STATE_BLOCKS]; }; struct aegis_ctx { union aegis_block key; }; static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_simd); static const union aegis_block crypto_aegis_const[2] = { { .words64 = { cpu_to_le64(U64_C(0x0d08050302010100)), cpu_to_le64(U64_C(0x6279e99059372215)), } }, { .words64 = { cpu_to_le64(U64_C(0xf12fc26d55183ddb)), cpu_to_le64(U64_C(0xdd28b57342311120)), } }, }; static bool aegis128_do_simd(void) { #ifdef CONFIG_CRYPTO_AEGIS128_SIMD if (static_branch_likely(&have_simd)) return crypto_simd_usable(); #endif return false; } static void crypto_aegis128_update(struct aegis_state *state) { union aegis_block tmp; unsigned int i; tmp = state->blocks[AEGIS128_STATE_BLOCKS - 1]; for (i = AEGIS128_STATE_BLOCKS - 1; i > 0; i--) crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1], &state->blocks[i]); crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]); } static void crypto_aegis128_update_a(struct aegis_state *state, const union aegis_block *msg, bool do_simd) { if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && do_simd) { crypto_aegis128_update_simd(state, msg); return; } crypto_aegis128_update(state); crypto_aegis_block_xor(&state->blocks[0], msg); } static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg, bool do_simd) { if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && do_simd) { crypto_aegis128_update_simd(state, msg); return; } crypto_aegis128_update(state); crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE); } static void crypto_aegis128_init(struct aegis_state *state, const union aegis_block *key, const u8 *iv) { union aegis_block key_iv; unsigned int i; key_iv = *key; crypto_xor(key_iv.bytes, iv, AEGIS_BLOCK_SIZE); state->blocks[0] = key_iv; state->blocks[1] = crypto_aegis_const[1]; state->blocks[2] = crypto_aegis_const[0]; state->blocks[3] = *key; state->blocks[4] = *key; crypto_aegis_block_xor(&state->blocks[3], &crypto_aegis_const[0]); crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[1]); for (i = 0; i < 5; i++) { crypto_aegis128_update_a(state, key, false); crypto_aegis128_update_a(state, &key_iv, false); } } static void crypto_aegis128_ad(struct aegis_state *state, const u8 *src, unsigned int size, bool do_simd) { if (AEGIS_ALIGNED(src)) { const union aegis_block *src_blk = (const union aegis_block *)src; while (size >= AEGIS_BLOCK_SIZE) { crypto_aegis128_update_a(state, src_blk, do_simd); size -= AEGIS_BLOCK_SIZE; src_blk++; } } else { while (size >= AEGIS_BLOCK_SIZE) { crypto_aegis128_update_u(state, src, do_simd); size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; } } } static void crypto_aegis128_wipe_chunk(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size) { memzero_explicit(dst, size); } static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size) { union aegis_block tmp; if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) { while (size >= AEGIS_BLOCK_SIZE) { union aegis_block *dst_blk = (union aegis_block *)dst; const union aegis_block *src_blk = (const union aegis_block *)src; tmp = state->blocks[2]; crypto_aegis_block_and(&tmp, &state->blocks[3]); crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_aegis_block_xor(&tmp, src_blk); crypto_aegis128_update_a(state, src_blk, false); *dst_blk = tmp; size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; dst += AEGIS_BLOCK_SIZE; } } else { while (size >= AEGIS_BLOCK_SIZE) { tmp = state->blocks[2]; crypto_aegis_block_and(&tmp, &state->blocks[3]); crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE); crypto_aegis128_update_u(state, src, false); memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE); size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; dst += AEGIS_BLOCK_SIZE; } } if (size > 0) { union aegis_block msg = {}; memcpy(msg.bytes, src, size); tmp = state->blocks[2]; crypto_aegis_block_and(&tmp, &state->blocks[3]); crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_aegis128_update_a(state, &msg, false); crypto_aegis_block_xor(&msg, &tmp); memcpy(dst, msg.bytes, size); } } static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size) { union aegis_block tmp; if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) { while (size >= AEGIS_BLOCK_SIZE) { union aegis_block *dst_blk = (union aegis_block *)dst; const union aegis_block *src_blk = (const union aegis_block *)src; tmp = state->blocks[2]; crypto_aegis_block_and(&tmp, &state->blocks[3]); crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_aegis_block_xor(&tmp, src_blk); crypto_aegis128_update_a(state, &tmp, false); *dst_blk = tmp; size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; dst += AEGIS_BLOCK_SIZE; } } else { while (size >= AEGIS_BLOCK_SIZE) { tmp = state->blocks[2]; crypto_aegis_block_and(&tmp, &state->blocks[3]); crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE); crypto_aegis128_update_a(state, &tmp, false); memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE); size -= AEGIS_BLOCK_SIZE; src += AEGIS_BLOCK_SIZE; dst += AEGIS_BLOCK_SIZE; } } if (size > 0) { union aegis_block msg = {}; memcpy(msg.bytes, src, size); tmp = state->blocks[2]; crypto_aegis_block_and(&tmp, &state->blocks[3]); crypto_aegis_block_xor(&tmp, &state->blocks[4]); crypto_aegis_block_xor(&tmp, &state->blocks[1]); crypto_aegis_block_xor(&msg, &tmp); memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size); crypto_aegis128_update_a(state, &msg, false); memcpy(dst, msg.bytes, size); } } static void crypto_aegis128_process_ad(struct aegis_state *state, struct scatterlist *sg_src, unsigned int assoclen, bool do_simd) { struct scatter_walk walk; union aegis_block buf; unsigned int pos = 0; scatterwalk_start(&walk, sg_src); while (assoclen != 0) { unsigned int size = scatterwalk_clamp(&walk, assoclen); unsigned int left = size; void *mapped = scatterwalk_map(&walk); const u8 *src = (const u8 *)mapped; if (pos + size >= AEGIS_BLOCK_SIZE) { if (pos > 0) { unsigned int fill = AEGIS_BLOCK_SIZE - pos; memcpy(buf.bytes + pos, src, fill); crypto_aegis128_update_a(state, &buf, do_simd); pos = 0; left -= fill; src += fill; } crypto_aegis128_ad(state, src, left, do_simd); src += left & ~(AEGIS_BLOCK_SIZE - 1); left &= AEGIS_BLOCK_SIZE - 1; } memcpy(buf.bytes + pos, src, left); pos += left; assoclen -= size; scatterwalk_unmap(mapped); scatterwalk_advance(&walk, size); scatterwalk_done(&walk, 0, assoclen); } if (pos > 0) { memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos); crypto_aegis128_update_a(state, &buf, do_simd); } } static __always_inline int crypto_aegis128_process_crypt(struct aegis_state *state, struct skcipher_walk *walk, void (*crypt)(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size)) { int err = 0; while (walk->nbytes) { unsigned int nbytes = walk->nbytes; if (nbytes < walk->total) nbytes = round_down(nbytes, walk->stride); crypt(state, walk->dst.virt.addr, walk->src.virt.addr, nbytes); err = skcipher_walk_done(walk, walk->nbytes - nbytes); } return err; } static void crypto_aegis128_final(struct aegis_state *state, union aegis_block *tag_xor, u64 assoclen, u64 cryptlen) { u64 assocbits = assoclen * 8; u64 cryptbits = cryptlen * 8; union aegis_block tmp; unsigned int i; tmp.words64[0] = cpu_to_le64(assocbits); tmp.words64[1] = cpu_to_le64(cryptbits); crypto_aegis_block_xor(&tmp, &state->blocks[3]); for (i = 0; i < 7; i++) crypto_aegis128_update_a(state, &tmp, false); for (i = 0; i < AEGIS128_STATE_BLOCKS; i++) crypto_aegis_block_xor(tag_xor, &state->blocks[i]); } static int crypto_aegis128_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { struct aegis_ctx *ctx = crypto_aead_ctx(aead); if (keylen != AEGIS128_KEY_SIZE) return -EINVAL; memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE); return 0; } static int crypto_aegis128_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { if (authsize > AEGIS128_MAX_AUTH_SIZE) return -EINVAL; if (authsize < AEGIS128_MIN_AUTH_SIZE) return -EINVAL; return 0; } static int crypto_aegis128_encrypt_generic(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); union aegis_block tag = {}; unsigned int authsize = crypto_aead_authsize(tfm); struct aegis_ctx *ctx = crypto_aead_ctx(tfm); unsigned int cryptlen = req->cryptlen; struct skcipher_walk walk; struct aegis_state state; skcipher_walk_aead_encrypt(&walk, req, false); crypto_aegis128_init(&state, &ctx->key, req->iv); crypto_aegis128_process_ad(&state, req->src, req->assoclen, false); crypto_aegis128_process_crypt(&state, &walk, crypto_aegis128_encrypt_chunk); crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen); scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, authsize, 1); return 0; } static int crypto_aegis128_decrypt_generic(struct aead_request *req) { static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {}; struct crypto_aead *tfm = crypto_aead_reqtfm(req); union aegis_block tag; unsigned int authsize = crypto_aead_authsize(tfm); unsigned int cryptlen = req->cryptlen - authsize; struct aegis_ctx *ctx = crypto_aead_ctx(tfm); struct skcipher_walk walk; struct aegis_state state; scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen, authsize, 0); skcipher_walk_aead_decrypt(&walk, req, false); crypto_aegis128_init(&state, &ctx->key, req->iv); crypto_aegis128_process_ad(&state, req->src, req->assoclen, false); crypto_aegis128_process_crypt(&state, &walk, crypto_aegis128_decrypt_chunk); crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen); if (unlikely(crypto_memneq(tag.bytes, zeros, authsize))) { /* * From Chapter 4. 'Security Analysis' of the AEGIS spec [0] * * "3. If verification fails, the decrypted plaintext and the * wrong authentication tag should not be given as output." * * [0] https://competitions.cr.yp.to/round3/aegisv11.pdf */ skcipher_walk_aead_decrypt(&walk, req, false); crypto_aegis128_process_crypt(NULL, &walk, crypto_aegis128_wipe_chunk); memzero_explicit(&tag, sizeof(tag)); return -EBADMSG; } return 0; } static int crypto_aegis128_encrypt_simd(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); union aegis_block tag = {}; unsigned int authsize = crypto_aead_authsize(tfm); struct aegis_ctx *ctx = crypto_aead_ctx(tfm); unsigned int cryptlen = req->cryptlen; struct skcipher_walk walk; struct aegis_state state; if (!aegis128_do_simd()) return crypto_aegis128_encrypt_generic(req); skcipher_walk_aead_encrypt(&walk, req, false); crypto_aegis128_init_simd(&state, &ctx->key, req->iv); crypto_aegis128_process_ad(&state, req->src, req->assoclen, true); crypto_aegis128_process_crypt(&state, &walk, crypto_aegis128_encrypt_chunk_simd); crypto_aegis128_final_simd(&state, &tag, req->assoclen, cryptlen, 0); scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, authsize, 1); return 0; } static int crypto_aegis128_decrypt_simd(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); union aegis_block tag; unsigned int authsize = crypto_aead_authsize(tfm); unsigned int cryptlen = req->cryptlen - authsize; struct aegis_ctx *ctx = crypto_aead_ctx(tfm); struct skcipher_walk walk; struct aegis_state state; if (!aegis128_do_simd()) return crypto_aegis128_decrypt_generic(req); scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen, authsize, 0); skcipher_walk_aead_decrypt(&walk, req, false); crypto_aegis128_init_simd(&state, &ctx->key, req->iv); crypto_aegis128_process_ad(&state, req->src, req->assoclen, true); crypto_aegis128_process_crypt(&state, &walk, crypto_aegis128_decrypt_chunk_simd); if (unlikely(crypto_aegis128_final_simd(&state, &tag, req->assoclen, cryptlen, authsize))) { skcipher_walk_aead_decrypt(&walk, req, false); crypto_aegis128_process_crypt(NULL, &walk, crypto_aegis128_wipe_chunk); return -EBADMSG; } return 0; } static struct aead_alg crypto_aegis128_alg_generic = { .setkey = crypto_aegis128_setkey, .setauthsize = crypto_aegis128_setauthsize, .encrypt = crypto_aegis128_encrypt_generic, .decrypt = crypto_aegis128_decrypt_generic, .ivsize = AEGIS128_NONCE_SIZE, .maxauthsize = AEGIS128_MAX_AUTH_SIZE, .chunksize = AEGIS_BLOCK_SIZE, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct aegis_ctx), .base.cra_priority = 100, .base.cra_name = "aegis128", .base.cra_driver_name = "aegis128-generic", .base.cra_module = THIS_MODULE, }; static struct aead_alg crypto_aegis128_alg_simd = { .setkey = crypto_aegis128_setkey, .setauthsize = crypto_aegis128_setauthsize, .encrypt = crypto_aegis128_encrypt_simd, .decrypt = crypto_aegis128_decrypt_simd, .ivsize = AEGIS128_NONCE_SIZE, .maxauthsize = AEGIS128_MAX_AUTH_SIZE, .chunksize = AEGIS_BLOCK_SIZE, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct aegis_ctx), .base.cra_priority = 200, .base.cra_name = "aegis128", .base.cra_driver_name = "aegis128-simd", .base.cra_module = THIS_MODULE, }; static int __init crypto_aegis128_module_init(void) { int ret; ret = crypto_register_aead(&crypto_aegis128_alg_generic); if (ret) return ret; if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && crypto_aegis128_have_simd()) { ret = crypto_register_aead(&crypto_aegis128_alg_simd); if (ret) { crypto_unregister_aead(&crypto_aegis128_alg_generic); return ret; } static_branch_enable(&have_simd); } return 0; } static void __exit crypto_aegis128_module_exit(void) { if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD) && crypto_aegis128_have_simd()) crypto_unregister_aead(&crypto_aegis128_alg_simd); crypto_unregister_aead(&crypto_aegis128_alg_generic); } subsys_initcall(crypto_aegis128_module_init); module_exit(crypto_aegis128_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm"); MODULE_ALIAS_CRYPTO("aegis128"); MODULE_ALIAS_CRYPTO("aegis128-generic"); MODULE_ALIAS_CRYPTO("aegis128-simd"); |
| 7 60 9 1 63 23 40 63 5 5 114 115 114 115 113 31 31 19 4 4 4 8 8 18 1 2 8 8 21 16 3 7 7 5 2 2 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 | // SPDX-License-Identifier: GPL-2.0-only /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ #include <net/tcp.h> #include <net/xfrm.h> #include <net/busy_poll.h> #include <net/rstreason.h> static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) return true; if (after(end_seq, s_win) && before(seq, e_win)) return true; return seq == e_win && seq == end_seq; } static enum tcp_tw_status tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, const struct sk_buff *skb, int mib_idx) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx, &tcptw->tw_last_oow_ack_time)) { /* Send ACK. Note, we do not put the bucket, * it will be released by caller. */ return TCP_TW_ACK; } /* We are rate-limiting, so just release the tw sock and drop skb. */ inet_twsk_put(tw); return TCP_TW_SUCCESS; } static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq, u32 rcv_nxt) { #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao; ao = rcu_dereference(tcptw->ao_info); if (unlikely(ao && seq < rcv_nxt)) WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1); #endif WRITE_ONCE(tcptw->tw_rcv_nxt, seq); } /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN * (and, probably, tail of data) and one or more our ACKs are lost. * * What is TIME-WAIT timeout? It is associated with maximal packet * lifetime in the internet, which results in wrong conclusion, that * it is set to catch "old duplicate segments" wandering out of their path. * It is not quite correct. This timeout is calculated so that it exceeds * maximal retransmission timeout enough to allow to lose one (or more) * segments sent by peer and our ACKs. This time may be calculated from RTO. * * When TIME-WAIT socket receives RST, it means that another end * finally closed and we are allowed to kill TIME-WAIT too. * * Second purpose of TIME-WAIT is catching old duplicate segments. * Well, certainly it is pure paranoia, but if we load TIME-WAIT * with this semantics, we MUST NOT kill TIME-WAIT state with RSTs. * * If we invented some more clever way to catch duplicates * (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs. * * The algorithm below is based on FORMAL INTERPRETATION of RFCs. * When you compare it to RFCs, please, read section SEGMENT ARRIVES * from the very beginning. * * NOTE. With recycling (and later with fin-wait-2) TW bucket * is _not_ stateless. It means, that strictly speaking we must * spinlock it. I do not want! Well, probability of misbehaviour * is ridiculously low and, seems, we could use some mb() tricks * to avoid misread sequence numbers, states etc. --ANK * * We don't need to initialize tmp_out.sack_ok as we don't use the results */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th, u32 *tw_isn) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt); struct tcp_options_received tmp_opt; bool paws_reject = false; int ts_recent_stamp; tmp_opt.saw_tstamp = 0; ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); if (th->doff > (sizeof(*th) >> 2) && ts_recent_stamp) { tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent); tmp_opt.ts_recent_stamp = ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) { /* Just repeat all the checks of tcp_rcv_state_process() */ /* Out of window, send ACK */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, rcv_nxt, rcv_nxt + tcptw->tw_rcv_wnd)) return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); if (th->rst) goto kill; if (th->syn && !before(TCP_SKB_CB(skb)->seq, rcv_nxt)) return TCP_TW_RST; /* Dup ACK? */ if (!th->ack || !after(TCP_SKB_CB(skb)->end_seq, rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { inet_twsk_put(tw); return TCP_TW_SUCCESS; } /* New data or FIN. If new data arrive after half-duplex close, * reset. */ if (!th->fin || TCP_SKB_CB(skb)->end_seq != rcv_nxt + 1) return TCP_TW_RST; /* FIN arrived, enter true time-wait state. */ WRITE_ONCE(tw->tw_substate, TCP_TIME_WAIT); twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq, rcv_nxt); if (tmp_opt.saw_tstamp) { u64 ts = tcp_clock_ms(); WRITE_ONCE(tw->tw_entry_stamp, ts); WRITE_ONCE(tcptw->tw_ts_recent_stamp, div_u64(ts, MSEC_PER_SEC)); WRITE_ONCE(tcptw->tw_ts_recent, tmp_opt.rcv_tsval); } inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } /* * Now real TIME-WAIT state. * * RFC 1122: * "When a connection is [...] on TIME-WAIT state [...] * [a TCP] MAY accept a new SYN from the remote TCP to * reopen the connection directly, if it: * * (1) assigns its initial sequence number for the new * connection to be larger than the largest sequence * number it used on the previous connection incarnation, * and * * (2) returns to TIME-WAIT state if the SYN turns out * to be an old duplicate". */ if (!paws_reject && (TCP_SKB_CB(skb)->seq == rcv_nxt && (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { /* In window segment, it may be only reset or bare ack. */ if (th->rst) { /* This is TIME_WAIT assassination, in two flavors. * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; } } else { inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); } if (tmp_opt.saw_tstamp) { WRITE_ONCE(tcptw->tw_ts_recent, tmp_opt.rcv_tsval); WRITE_ONCE(tcptw->tw_ts_recent_stamp, ktime_get_seconds()); } inet_twsk_put(tw); return TCP_TW_SUCCESS; } /* Out of window segment. All the segments are ACKed immediately. The only exception is new SYN. We accept it, if it is not old duplicate and we are not in danger to be killed by delayed old duplicates. RFC check is that it has newer sequence number works at rates <40Mbit/sec. However, if paws works, it is reliable AND even more, we even may relax silly seq space cutoff. RED-PEN: we violate main RFC requirement, if this SYN will appear old duplicate (i.e. we receive RST in reply to SYN-ACK), we must return socket to time-wait state. It is not good, but not fatal yet. */ if (th->syn && !th->rst && !th->ack && !paws_reject && (after(TCP_SKB_CB(skb)->seq, rcv_nxt) || (tmp_opt.saw_tstamp && (s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) { u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; *tw_isn = isn; return TCP_TW_SYN; } if (paws_reject) __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. * * If it is ACKless SYN it may be both old duplicate * and new good SYN with random sequence number <rcv_nxt. * Do not reschedule in the last case. */ if (paws_reject || th->ack) inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); } inet_twsk_put(tw); return TCP_TW_SUCCESS; } EXPORT_SYMBOL(tcp_timewait_state_process); static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) { #ifdef CONFIG_TCP_MD5SIG const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; /* * The timewait bucket does not have the key DB from the * sock structure. We just make a quick copy of the * md5 key being used (if indeed we are using one) * so the timewait ack generating code has the key. */ tcptw->tw_md5_key = NULL; if (!static_branch_unlikely(&tcp_md5_needed.key)) return; key = tp->af_specific->md5_lookup(sk, sk); if (key) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (!tcptw->tw_md5_key) return; if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) goto out_free; tcp_md5_add_sigpool(); } return; out_free: WARN_ON_ONCE(1); kfree(tcptw->tw_md5_key); tcptw->tw_md5_key = NULL; #endif } /* * Move a socket to time-wait or dead fin-wait-2 state. */ void tcp_time_wait(struct sock *sk, int state, int timeo) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_timewait_sock *tw; tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_mark = sk->sk_mark; tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; /* refreshed when we enter true TIME-WAIT state */ tw->tw_entry_stamp = tcp_time_stamp_ms(tp); tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; tw->tw_usec_ts = tp->tcp_usec_ts; tcptw->tw_last_oow_ack_time = 0; tcptw->tw_tx_delay = tp->tcp_tx_delay; tw->tw_txhash = sk->sk_txhash; tw->tw_tx_queue_mapping = sk->sk_tx_queue_mapping; #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING tw->tw_rx_queue_mapping = sk->sk_rx_queue_mapping; #endif #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); tw->tw_ipv6only = sk->sk_ipv6only; } #endif tcp_time_wait_init(sk, tcptw); tcp_ao_time_wait(tcptw, tp); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) timeo = rto; if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; /* Linkage updates. * Note that access to tw after this point is illegal. */ inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ NET_INC_STATS(net, LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); tcp_done(sk); } EXPORT_SYMBOL(tcp_time_wait); #ifdef CONFIG_TCP_MD5SIG static void tcp_md5_twsk_free_rcu(struct rcu_head *head) { struct tcp_md5sig_key *key; key = container_of(head, struct tcp_md5sig_key, rcu); kfree(key); static_branch_slow_dec_deferred(&tcp_md5_needed); tcp_md5_release_sigpool(); } #endif void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG if (static_branch_unlikely(&tcp_md5_needed.key)) { struct tcp_timewait_sock *twsk = tcp_twsk(sk); if (twsk->tw_md5_key) call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu); } #endif tcp_ao_destroy_sock(sk, true); } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); void tcp_twsk_purge(struct list_head *net_exit_list) { bool purged_once = false; struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { if (net->ipv4.tcp_death_row.hashinfo->pernet) { /* Even if tw_refcount == 1, we must clean up kernel reqsk */ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo); } else if (!purged_once) { inet_twsk_purge(&tcp_hashinfo); purged_once = true; } } } /* Warning : This function is called without sk_listener being locked. * Be sure to read socket fields once, as their value could change under us. */ void tcp_openreq_init_rwin(struct request_sock *req, const struct sock *sk_listener, const struct dst_entry *dst) { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk_listener); int full_space = tcp_full_space(sk_listener); u32 window_clamp; __u8 rcv_wscale; u32 rcv_wnd; int mss; mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); window_clamp = READ_ONCE(tp->window_clamp); /* Set this up on the first call only */ req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK && (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req); if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); else if (full_space < rcv_wnd * mss) full_space = rcv_wnd * mss; /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(sk_listener, full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, rcv_wnd); ireq->rcv_wscale = rcv_wscale; } EXPORT_SYMBOL(tcp_openreq_init_rwin); static void tcp_ecn_openreq_child(struct tcp_sock *tp, const struct request_sock *req) { tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; if (ca_key != TCP_CA_UNSPEC) { const struct tcp_congestion_ops *ca; rcu_read_lock(); ca = tcp_ca_find_key(ca_key); if (likely(ca && bpf_try_module_get(ca, ca->owner))) { icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); icsk->icsk_ca_ops = ca; ca_got_dst = true; } rcu_read_unlock(); } /* If no valid choice made yet, assign current system default ca. */ if (!ca_got_dst && (!icsk->icsk_ca_setsockopt || !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner))) tcp_assign_congestion_control(sk); tcp_set_ca_state(sk, TCP_CA_Open); } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, struct tcp_sock *newtp) { #if IS_ENABLED(CONFIG_SMC) struct inet_request_sock *ireq; if (static_branch_unlikely(&tcp_have_smc)) { ireq = inet_rsk(req); if (oldtp->syn_smc && !ireq->smc_ok) newtp->syn_smc = 0; } #endif } /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * * Actually, we could lots of memory writes here. tp of listening * socket contains all necessary default parameters. */ struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, struct sk_buff *skb) { struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk; const struct tcp_sock *oldtp; struct tcp_sock *newtp; u32 seq; if (!newsk) return NULL; newicsk = inet_csk(newsk); newtp = tcp_sk(newsk); oldtp = tcp_sk(sk); smc_check_reset_syn_req(oldtp, req, newtp); /* Now setup tcp_sock */ newtp->pred_flags = 0; seq = treq->rcv_isn + 1; newtp->rcv_wup = seq; WRITE_ONCE(newtp->copied_seq, seq); WRITE_ONCE(newtp->rcv_nxt, seq); newtp->segs_in = 1; seq = treq->snt_isn + 1; newtp->snd_sml = newtp->snd_una = seq; WRITE_ONCE(newtp->snd_nxt, seq); newtp->snd_up = seq; INIT_LIST_HEAD(&newtp->tsq_node); INIT_LIST_HEAD(&newtp->tsorted_sent_queue); tcp_init_wl(newtp, treq->rcv_isn); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); newicsk->icsk_ack.lrcvtime = tcp_jiffies32; newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = READ_ONCE(treq->txhash); newtp->total_retrans = req->num_retrans; tcp_init_xmit_timers(newsk); WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; newtp->rx_opt.sack_ok = ireq->sack_ok; newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; newtp->rx_opt.wscale_ok = ireq->wscale_ok; if (newtp->rx_opt.wscale_ok) { newtp->rx_opt.snd_wscale = ireq->snd_wscale; newtp->rx_opt.rcv_wscale = ireq->rcv_wscale; } else { newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; newtp->window_clamp = min(newtp->window_clamp, 65535U); } newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale; newtp->max_window = newtp->snd_wnd; if (newtp->rx_opt.tstamp_ok) { newtp->tcp_usec_ts = treq->req_usec_ts; newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { newtp->tcp_usec_ts = 0; newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } if (req->num_timeout) { newtp->total_rto = req->num_timeout; newtp->undo_marker = treq->snt_isn; if (newtp->tcp_usec_ts) { newtp->retrans_stamp = treq->snt_synack; newtp->total_rto_time = (u32)(tcp_clock_us() - newtp->retrans_stamp) / USEC_PER_MSEC; } else { newtp->retrans_stamp = div_u64(treq->snt_synack, USEC_PER_SEC / TCP_TS_HZ); newtp->total_rto_time = tcp_clock_ms() - newtp->retrans_stamp; } newtp->total_rto_recoveries = 1; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ #endif #ifdef CONFIG_TCP_AO newtp->ao_info = NULL; if (tcp_rsk_used_ao(req)) { struct tcp_ao_key *ao_key; ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1); if (ao_key) newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); } #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); newtp->bpf_chg_cc_inprogress = 0; tcp_bpf_clone(sk, newsk); __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); xa_init_flags(&newsk->sk_user_frags, XA_FLAGS_ALLOC1); return newsk; } EXPORT_SYMBOL(tcp_create_openreq_child); /* * Process an incoming packet for SYN_RECV sockets represented as a * request_sock. Normally sk is the listener socket but for TFO it * points to the child socket. * * XXX (TFO) - The current impl contains a special check for ack * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? * * We don't need to initialize tmp_opt.sack_ok as we don't use the results * * Note: If @fastopen is true, this can be called from process context. * Otherwise, this is from BH context. */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *req_stolen) { struct tcp_options_received tmp_opt; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; bool own_req; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = READ_ONCE(req->ts_recent); if (tmp_opt.rcv_tsecr) tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; /* We do not store true stamp, but it is not required, * it can be estimated (approximately) * from another data. */ tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } /* Check for pure retransmitted SYN. */ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN && !paws_reject) { /* * RFC793 draws (Incorrectly! It was fixed in RFC1122) * this case on figure 6 and figure 8, but formal * protocol description says NOTHING. * To be more exact, it says that we should send ACK, * because this segment (at least, if it has no data) * is out of window. * * CONCLUSION: RFC793 (even with RFC1122) DOES NOT * describe SYN-RECV state. All the description * is wrong, we cannot believe to it and should * rely only on common sense and implementation * experience. * * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. * * Note that even if there is new data in the SYN packet * they will be thrown away too. * * Reset timer after retransmitting SYNACK, similar to * the idea of fast retransmit in recovery. */ if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time) && !inet_rtx_syn_ack(sk, req)) { unsigned long expires = jiffies; expires += reqsk_timeout(req, TCP_RTO_MAX); if (!fastopen) mod_timer_pending(&req->rsk_timer, expires); else req->rsk_timer.expires = expires; } return NULL; } /* Further reproduces section "SEGMENT ARRIVES" for state SYN-RECEIVED of RFC793. It is broken, however, it does not work only when SYNs are crossed. You would think that SYN crossing is impossible here, since we should have a SYN_SENT socket (from connect()) on our end, but this is not true if the crossed SYNs were sent to both ends by a malicious third party. We must defend against this, and to do that we first verify the ACK (as per RFC793, page 36) and reset if it is invalid. Is this a true full defense? To convince ourselves, let us consider a way in which the ACK test can still pass in this 'malicious crossed SYNs' case. Malicious sender sends identical SYNs (and thus identical sequence numbers) to both A and B: A: gets SYN, seq=7 B: gets SYN, seq=7 By our good fortune, both A and B select the same initial send sequence number of seven :-) A: sends SYN|ACK, seq=7, ack_seq=8 B: sends SYN|ACK, seq=7, ack_seq=8 So we are now A eating this SYN|ACK, ACK test passes. So does sequence test, SYN is truncated, and thus we consider it a bare ACK. If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this bare ACK. Otherwise, we create an established connection. Both ends (listening sockets) accept the new incoming connection and try to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. But generally, we should (RFC lies!) to accept ACK from SYNACK both here and in tcp_rcv_state_process(). tcp_rcv_state_process() does not, hence, we do not too. Note that the case is absolutely generic: we cannot optimize anything here without violating protocol. All the checks must be made before attempt to create socket. */ /* RFC793 page 36: "If the connection is in any non-synchronized state ... * and the incoming segment acknowledges something not yet * sent (the segment carries an unacceptable ACK) ... * a reset is sent." * * Invalid ACK: reset will be sent by listening socket. * Note that the ACK validity check for a Fast Open socket is done * elsewhere and is checked directly against the child socket rather * than req because user data may have been sent out. */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which * is essentially ACK extension and too early or too late values * should cause reset in unsynchronized states. */ /* RFC793: "first check sequence number". */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + tcp_synack_window(req))) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST) && !tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } /* In sequence, PAWS is OK. */ /* TODO: We probably should defer ts_recent change once * we take ownership of @req. */ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { /* Truncate SYN, it is out of window starting at tcp_rsk(req)->rcv_isn + 1. */ flg &= ~TCP_FLAG_SYN; } /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. * * XXX (TFO) - if we ever allow "data after SYN", the * following check needs to be removed. */ if (!(flg & TCP_FLAG_ACK)) return NULL; /* For Fast Open no more processing is needed (sk is the * child socket). */ if (fastopen) return sk; /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all * the tests. THIS SEGMENT MUST MOVE SOCKET TO * ESTABLISHED STATE. If it will be dropped after * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); if (!child) goto listen_overflow; if (own_req && rsk_drop_req(req)) { reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); return child; } sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); *req_stolen = !own_req; return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) { inet_rsk(req)->acked = 1; return NULL; } embryonic_reset: if (!(flg & TCP_FLAG_RST)) { /* Received a bad SYN pkt - for TFO We try not to reset * the local connection unless it's really necessary to * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_INVALID_SYN); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); tcp_reset(sk, skb); } if (!fastopen) { bool unlinked = inet_csk_reqsk_queue_drop(sk, req); if (unlinked) __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); *req_stolen = !unlinked; } return NULL; } EXPORT_SYMBOL(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. * * For the vast majority of cases child->sk_state will be TCP_SYN_RECV * when entering. But other states are possible due to a race condition * where after __inet_lookup_established() fails but before the listener * locked is obtained, other packets cause the same connection to * be created. */ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb) __releases(&((child)->sk_lock.slock)) { enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; int state = child->sk_state; /* record sk_napi_id and sk_rx_queue_mapping of child. */ sk_mark_napi_id_set(child, skb); tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { reason = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening * socket does not protect us more. */ __sk_add_backlog(child, skb); } bh_unlock_sock(child); sock_put(child); return reason; } EXPORT_SYMBOL(tcp_child_process); |
| 107 107 114 1 1 111 1 1 7 104 107 12 95 95 103 104 38 38 1 1 3 3 3 3 3 3 3 3 3 1 1 1 1 24 24 24 1 1 1 1 11 11 3 5 5 3 1 5 5 14 4 9 10 10 10 4 3 5 3 2 8 1 28 28 8 20 4 6 12 6 49 49 44 1 1 1 2 64 64 24 12 4 13 13 43 41 43 37 1 2 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 | /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth address family and sockets. */ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/stringify.h> #include <linux/sched/signal.h> #include <asm/ioctls.h> #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> #include "leds.h" #include "selftest.h" /* Bluetooth sockets */ #define BT_MAX_PROTO (BTPROTO_LAST + 1) static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; static const char *const bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", "sk_lock-AF_BLUETOOTH-BTPROTO_HCI", "sk_lock-AF_BLUETOOTH-BTPROTO_SCO", "sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM", "sk_lock-AF_BLUETOOTH-BTPROTO_BNEP", "sk_lock-AF_BLUETOOTH-BTPROTO_CMTP", "sk_lock-AF_BLUETOOTH-BTPROTO_HIDP", "sk_lock-AF_BLUETOOTH-BTPROTO_AVDTP", "sk_lock-AF_BLUETOOTH-BTPROTO_ISO", }; static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_L2CAP", "slock-AF_BLUETOOTH-BTPROTO_HCI", "slock-AF_BLUETOOTH-BTPROTO_SCO", "slock-AF_BLUETOOTH-BTPROTO_RFCOMM", "slock-AF_BLUETOOTH-BTPROTO_BNEP", "slock-AF_BLUETOOTH-BTPROTO_CMTP", "slock-AF_BLUETOOTH-BTPROTO_HIDP", "slock-AF_BLUETOOTH-BTPROTO_AVDTP", "slock-AF_BLUETOOTH-BTPROTO_ISO", }; void bt_sock_reclassify_lock(struct sock *sk, int proto) { BUG_ON(!sk); BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } EXPORT_SYMBOL(bt_sock_reclassify_lock); int bt_sock_register(int proto, const struct net_proto_family *ops) { int err = 0; if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; write_lock(&bt_proto_lock); if (bt_proto[proto]) err = -EEXIST; else bt_proto[proto] = ops; write_unlock(&bt_proto_lock); return err; } EXPORT_SYMBOL(bt_sock_register); void bt_sock_unregister(int proto) { if (proto < 0 || proto >= BT_MAX_PROTO) return; write_lock(&bt_proto_lock); bt_proto[proto] = NULL; write_unlock(&bt_proto_lock); } EXPORT_SYMBOL(bt_sock_unregister); static int bt_sock_create(struct net *net, struct socket *sock, int proto, int kern) { int err; if (net != &init_net) return -EAFNOSUPPORT; if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; if (!bt_proto[proto]) request_module("bt-proto-%d", proto); err = -EPROTONOSUPPORT; read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto, kern); if (!err) bt_sock_reclassify_lock(sock->sk, proto); module_put(bt_proto[proto]->owner); } read_unlock(&bt_proto_lock); return err; } struct sock *bt_sock_alloc(struct net *net, struct socket *sock, struct proto *prot, int proto, gfp_t prio, int kern) { struct sock *sk; sk = sk_alloc(net, PF_BLUETOOTH, prio, prot, kern); if (!sk) return NULL; sock_init_data(sock, sk); INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; sk->sk_state = BT_OPEN; /* Init peer information so it can be properly monitored */ if (!kern) { spin_lock(&sk->sk_peer_lock); sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); spin_unlock(&sk->sk_peer_lock); } return sk; } EXPORT_SYMBOL(bt_sock_alloc); void bt_sock_link(struct bt_sock_list *l, struct sock *sk) { write_lock(&l->lock); sk_add_node(sk, &l->head); write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_link); void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { write_lock(&l->lock); sk_del_node_init(sk); write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_unlink); bool bt_sock_linked(struct bt_sock_list *l, struct sock *s) { struct sock *sk; if (!l || !s) return false; read_lock(&l->lock); sk_for_each(sk, &l->head) { if (s == sk) { read_unlock(&l->lock); return true; } } read_unlock(&l->lock); return false; } EXPORT_SYMBOL(bt_sock_linked); void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; struct pid *old_pid; BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); if (bh) bh_lock_sock_nested(sk); else lock_sock_nested(sk, SINGLE_DEPTH_NESTING); list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; /* Copy credentials from parent since for incoming connections the * socket is allocated by the kernel. */ spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(parent->sk_peer_pid); sk->sk_peer_cred = get_cred(parent->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); put_cred(old_cred); if (bh) bh_unlock_sock(sk); else release_sock(sk); sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); /* Calling function must hold the sk lock. * bt_sk(sk)->parent must be non-NULL meaning sk is in the parent list. */ void bt_accept_unlink(struct sock *sk) { BT_DBG("sk %p state %d", sk, sk->sk_state); list_del_init(&bt_sk(sk)->accept_q); sk_acceptq_removed(bt_sk(sk)->parent); bt_sk(sk)->parent = NULL; sock_put(sk); } EXPORT_SYMBOL(bt_accept_unlink); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { struct bt_sock *s, *n; struct sock *sk; BT_DBG("parent %p", parent); restart: list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; /* Prevent early freeing of sk due to unlink and sock_kill */ sock_hold(sk); lock_sock(sk); /* Check sk has not already been unlinked via * bt_accept_unlink() due to serialisation caused by sk locking */ if (!bt_sk(sk)->parent) { BT_DBG("sk %p, already unlinked", sk); release_sock(sk); sock_put(sk); /* Restart the loop as sk is no longer in the list * and also avoid a potential infinite loop because * list_for_each_entry_safe() is not thread safe. */ goto restart; } /* sk is safely in the parent list so reduce reference count */ sock_put(sk); /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { bt_accept_unlink(sk); release_sock(sk); continue; } if (sk->sk_state == BT_CONNECTED || !newsock || test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) { bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); release_sock(sk); return sk; } release_sock(sk); } return NULL; } EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; size_t copied; size_t skblen; int err; BT_DBG("sock %p sk %p len %zu", sock, sk, len); if (flags & MSG_OOB) return -EOPNOTSUPP; skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; return err; } skblen = skb->len; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err == 0) { sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); if (test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags)) { u8 pkt_status = hci_skb_pkt_status(skb); put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS, sizeof(pkt_status), &pkt_status); } } skb_free_datagram(sk, skb); if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; } EXPORT_SYMBOL(bt_sock_recvmsg); static long bt_sock_data_wait(struct sock *sk, long timeo) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(sk_sleep(sk), &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue)) break; if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN)) break; if (signal_pending(current) || !timeo) break; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return timeo; } int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; size_t target, copied = 0; long timeo; if (flags & MSG_OOB) return -EOPNOTSUPP; BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb; int chunk; skb = skb_dequeue(&sk->sk_receive_queue); if (!skb) { if (copied >= target) break; err = sock_error(sk); if (err) break; if (sk->sk_shutdown & RCV_SHUTDOWN) break; err = -EAGAIN; if (!timeo) break; timeo = bt_sock_data_wait(sk, timeo); if (signal_pending(current)) { err = sock_intr_errno(timeo); goto out; } continue; } chunk = min_t(unsigned int, skb->len, size); if (skb_copy_datagram_msg(skb, 0, msg, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); if (!copied) copied = -EFAULT; break; } copied += chunk; size -= chunk; sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { int skb_len = skb_headlen(skb); if (chunk <= skb_len) { __skb_pull(skb, chunk); } else { struct sk_buff *frag; __skb_pull(skb, skb_len); chunk -= skb_len; skb_walk_frags(skb, frag) { if (chunk <= frag->len) { /* Pulling partial data */ skb->len -= chunk; skb->data_len -= chunk; __skb_pull(frag, chunk); break; } else if (frag->len) { /* Pulling all frag data */ chunk -= frag->len; skb->len -= frag->len; skb->data_len -= frag->len; __skb_pull(frag, frag->len); } } } if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; } kfree_skb(skb); } else { /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); break; } } while (size); out: release_sock(sk); return copied ? : err; } EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline __poll_t bt_accept_poll(struct sock *parent) { struct bt_sock *s, *n; struct sock *sk; list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; if (sk->sk_state == BT_CONNECTED || (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && sk->sk_state == BT_CONNECT2)) return EPOLLIN | EPOLLRDNORM; } return 0; } __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == BT_CLOSED) mask |= EPOLLHUP; if (sk->sk_state == BT_CONNECT || sk->sk_state == BT_CONNECT2 || sk->sk_state == BT_CONFIG) return mask; if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } EXPORT_SYMBOL(bt_sock_poll); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct sk_buff *skb; long amount; int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); switch (cmd) { case TIOCOUTQ: if (sk->sk_state == BT_LISTEN) return -EINVAL; amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *)arg); break; case TIOCINQ: if (sk->sk_state == BT_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; spin_unlock(&sk->sk_receive_queue.lock); err = put_user(amount, (int __user *)arg); break; default: err = -ENOIOCTLCMD; break; } return err; } EXPORT_SYMBOL(bt_sock_ioctl); /* This function expects the sk lock to be held when called */ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); int err = 0; BT_DBG("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { if (!timeo) { err = -EINPROGRESS; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_state); /* This function expects the sk lock to be held when called */ int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags) { DECLARE_WAITQUEUE(wait, current); unsigned long timeo; int err = 0; BT_DBG("sk %p", sk); timeo = sock_sndtimeo(sk, !!(msg_flags & MSG_DONTWAIT)); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags)) { if (!timeo) { err = -EAGAIN; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_ready); #ifdef CONFIG_PROC_FS static void *bt_seq_start(struct seq_file *seq, loff_t *pos) __acquires(seq->private->l->lock) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_lock(&l->lock); return seq_hlist_start_head(&l->head, *pos); } static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); return seq_hlist_next(v, &l->head, pos); } static void bt_seq_stop(struct seq_file *seq, void *v) __releases(seq->private->l->lock) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_unlock(&l->lock); } static int bt_seq_show(struct seq_file *seq, void *v) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); if (v == SEQ_START_TOKEN) { seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent"); if (l->custom_seq_show) { seq_putc(seq, ' '); l->custom_seq_show(seq, v); } seq_putc(seq, '\n'); } else { struct sock *sk = sk_entry(v); struct bt_sock *bt = bt_sk(sk); seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu", sk, refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), bt->parent ? sock_i_ino(bt->parent) : 0LU); if (l->custom_seq_show) { seq_putc(seq, ' '); l->custom_seq_show(seq, v); } seq_putc(seq, '\n'); } return 0; } static const struct seq_operations bt_seq_ops = { .start = bt_seq_start, .next = bt_seq_next, .stop = bt_seq_stop, .show = bt_seq_show, }; int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (*seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list)) return -ENOMEM; return 0; } void bt_procfs_cleanup(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); } #else int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (*seq_show)(struct seq_file *, void *)) { return 0; } void bt_procfs_cleanup(struct net *net, const char *name) { } #endif EXPORT_SYMBOL(bt_procfs_init); EXPORT_SYMBOL(bt_procfs_cleanup); static const struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, .create = bt_sock_create, }; struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); #define VERSION __stringify(BT_SUBSYS_VERSION) "." \ __stringify(BT_SUBSYS_REVISION) static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) return err; bt_debugfs = debugfs_create_dir("bluetooth", NULL); bt_leds_init(); err = bt_sysfs_init(); if (err < 0) goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) goto cleanup_sysfs; BT_INFO("HCI device and connection manager initialized"); err = hci_sock_init(); if (err) goto unregister_socket; err = l2cap_init(); if (err) goto cleanup_socket; err = sco_init(); if (err) goto cleanup_cap; err = mgmt_init(); if (err) goto cleanup_sco; return 0; cleanup_sco: sco_exit(); cleanup_cap: l2cap_exit(); cleanup_socket: hci_sock_cleanup(); unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); cleanup_led: bt_leds_cleanup(); debugfs_remove_recursive(bt_debugfs); return err; } static void __exit bt_exit(void) { iso_exit(); mgmt_exit(); sco_exit(); l2cap_exit(); hci_sock_cleanup(); sock_unregister(PF_BLUETOOTH); bt_sysfs_cleanup(); bt_leds_cleanup(); debugfs_remove_recursive(bt_debugfs); } subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); |
| 5059 10 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/linux/eventpoll.h ( Efficient event polling implementation ) * Copyright (C) 2001,...,2006 Davide Libenzi * * Davide Libenzi <davidel@xmailserver.org> */ #ifndef _LINUX_EVENTPOLL_H #define _LINUX_EVENTPOLL_H #include <uapi/linux/eventpoll.h> #include <uapi/linux/kcmp.h> /* Forward declarations to avoid compiler errors */ struct file; #ifdef CONFIG_EPOLL #ifdef CONFIG_KCMP struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); #endif /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); /* * This is called from inside fs/file_table.c:__fput() to unlink files * from the eventpoll interface. We need to have this facility to cleanup * correctly files that are closed without being removed from the eventpoll * interface. */ static inline void eventpoll_release(struct file *file) { /* * Fast check to avoid the get/release of the semaphore. Since * we're doing this outside the semaphore lock, it might return * false negatives, but we don't care. It'll help in 99.99% of cases * to avoid the semaphore lock. False positives simply cannot happen * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ if (likely(!READ_ONCE(file->f_ep))) return; /* * The file is being closed while it is still linked to an epoll * descriptor. We need to handle this by correctly unlinking it * from its containers. */ eventpoll_release_file(file); } int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, bool nonblock); /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) { return op != EPOLL_CTL_DEL; } #else static inline void eventpoll_release(struct file *file) {} #endif #if defined(CONFIG_ARM) && defined(CONFIG_OABI_COMPAT) /* ARM OABI has an incompatible struct layout and needs a special handler */ extern struct epoll_event __user * epoll_put_uevent(__poll_t revents, __u64 data, struct epoll_event __user *uevent); #else static inline struct epoll_event __user * epoll_put_uevent(__poll_t revents, __u64 data, struct epoll_event __user *uevent) { if (__put_user(revents, &uevent->events) || __put_user(data, &uevent->data)) return NULL; return uevent+1; } #endif #endif /* #ifndef _LINUX_EVENTPOLL_H */ |
| 435 437 437 364 354 442 29 444 30 30 74 8 8 74 205 2 205 205 447 318 436 57 5 3 1 317 437 442 315 434 12 325 433 35 35 30 354 352 354 354 17 17 17 360 359 9 8 32 11 33 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_trace.h" /* * Check to see if a buffer matching the given parameters is already * a part of the given transaction. */ STATIC struct xfs_buf * xfs_trans_buf_item_match( struct xfs_trans *tp, struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps) { struct xfs_log_item *lip; struct xfs_buf_log_item *blip; int len = 0; int i; for (i = 0; i < nmaps; i++) len += map[i].bm_len; list_for_each_entry(lip, &tp->t_items, li_trans) { blip = (struct xfs_buf_log_item *)lip; if (blip->bli_item.li_type == XFS_LI_BUF && blip->bli_buf->b_target == target && xfs_buf_daddr(blip->bli_buf) == map[0].bm_bn && blip->bli_buf->b_length == len) { ASSERT(blip->bli_buf->b_map_count == nmaps); return blip->bli_buf; } } return NULL; } /* * Add the locked buffer to the transaction. * * The buffer must be locked, and it cannot be associated with any * transaction. * * If the buffer does not yet have a buf log item associated with it, * then allocate one for it. Then add the buf item to the transaction. */ STATIC void _xfs_trans_bjoin( struct xfs_trans *tp, struct xfs_buf *bp, int reset_recur) { struct xfs_buf_log_item *bip; ASSERT(bp->b_transp == NULL); /* * The xfs_buf_log_item pointer is stored in b_log_item. If * it doesn't have one yet, then allocate one and initialize it. * The checks to see if one is there are in xfs_buf_item_init(). */ xfs_buf_item_init(bp, tp->t_mountp); bip = bp->b_log_item; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); if (reset_recur) bip->bli_recur = 0; /* * Take a reference for this transaction on the buf item. */ atomic_inc(&bip->bli_refcount); /* * Attach the item to the transaction so we can find it in * xfs_trans_get_buf() and friends. */ xfs_trans_add_item(tp, &bip->bli_item); bp->b_transp = tp; } void xfs_trans_bjoin( struct xfs_trans *tp, struct xfs_buf *bp) { _xfs_trans_bjoin(tp, bp, 0); trace_xfs_trans_bjoin(bp->b_log_item); } /* * Get and lock the buffer for the caller if it is not already * locked within the given transaction. If it is already locked * within the transaction, just increment its lock recursion count * and return a pointer to it. * * If the transaction pointer is NULL, make this just a normal * get_buf() call. */ int xfs_trans_get_buf_map( struct xfs_trans *tp, struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp) { struct xfs_buf *bp; struct xfs_buf_log_item *bip; int error; *bpp = NULL; if (!tp) return xfs_buf_get_map(target, map, nmaps, flags, bpp); /* * If we find the buffer in the cache with this transaction * pointer in its b_fsprivate2 field, then we know we already * have it locked. In this case we just increment the lock * recursion count and return the buffer to the caller. */ bp = xfs_trans_buf_item_match(tp, target, map, nmaps); if (bp != NULL) { ASSERT(xfs_buf_islocked(bp)); if (xfs_is_shutdown(tp->t_mountp)) { xfs_buf_stale(bp); bp->b_flags |= XBF_DONE; } ASSERT(bp->b_transp == tp); bip = bp->b_log_item; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_get_buf_recur(bip); *bpp = bp; return 0; } error = xfs_buf_get_map(target, map, nmaps, flags, &bp); if (error) return error; ASSERT(!bp->b_error); _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_get_buf(bp->b_log_item); *bpp = bp; return 0; } /* * Get and lock the superblock buffer for the given transaction. */ static struct xfs_buf * __xfs_trans_getsb( struct xfs_trans *tp, struct xfs_buf *bp) { /* * Just increment the lock recursion count if the buffer is already * attached to this transaction. */ if (bp->b_transp == tp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; trace_xfs_trans_getsb_recur(bip); } else { xfs_buf_lock(bp); xfs_buf_hold(bp); _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_getsb(bp->b_log_item); } return bp; } struct xfs_buf * xfs_trans_getsb( struct xfs_trans *tp) { return __xfs_trans_getsb(tp, tp->t_mountp->m_sb_bp); } struct xfs_buf * xfs_trans_getrtsb( struct xfs_trans *tp) { if (!tp->t_mountp->m_rtsb_bp) return NULL; return __xfs_trans_getsb(tp, tp->t_mountp->m_rtsb_bp); } /* * Get and lock the buffer for the caller if it is not already * locked within the given transaction. If it has not yet been * read in, read it from disk. If it is already locked * within the transaction and already read in, just increment its * lock recursion count and return a pointer to it. * * If the transaction pointer is NULL, make this just a normal * read_buf() call. */ int xfs_trans_read_buf_map( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { struct xfs_buf *bp = NULL; struct xfs_buf_log_item *bip; int error; *bpp = NULL; /* * If we find the buffer in the cache with this transaction * pointer in its b_fsprivate2 field, then we know we already * have it locked. If it is already read in we just increment * the lock recursion count and return the buffer to the caller. * If the buffer is not yet read in, then we read it in, increment * the lock recursion count, and return it to the caller. */ if (tp) bp = xfs_trans_buf_item_match(tp, target, map, nmaps); if (bp) { ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_transp == tp); ASSERT(bp->b_log_item != NULL); ASSERT(!bp->b_error); ASSERT(bp->b_flags & XBF_DONE); /* * We never locked this buf ourselves, so we shouldn't * brelse it either. Just get out. */ if (xfs_is_shutdown(mp)) { trace_xfs_trans_read_buf_shut(bp, _RET_IP_); return -EIO; } /* * Check if the caller is trying to read a buffer that is * already attached to the transaction yet has no buffer ops * assigned. Ops are usually attached when the buffer is * attached to the transaction, or by the read caller if * special circumstances. That didn't happen, which is not * how this is supposed to go. * * If the buffer passes verification we'll let this go, but if * not we have to shut down. Let the transaction cleanup code * release this buffer when it kills the tranaction. */ ASSERT(bp->b_ops != NULL); error = xfs_buf_reverify(bp, ops); if (error) { xfs_buf_ioerror_alert(bp, __return_address); if (tp->t_flags & XFS_TRANS_DIRTY) xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); /* bad CRC means corrupted metadata */ if (error == -EFSBADCRC) error = -EFSCORRUPTED; return error; } bip = bp->b_log_item; bip->bli_recur++; ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_read_buf_recur(bip); ASSERT(bp->b_ops != NULL || ops == NULL); *bpp = bp; return 0; } error = xfs_buf_read_map(target, map, nmaps, flags, &bp, ops, __return_address); switch (error) { case 0: break; default: if (tp && (tp->t_flags & XFS_TRANS_DIRTY)) xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); fallthrough; case -ENOMEM: case -EAGAIN: return error; } if (xfs_is_shutdown(mp)) { xfs_buf_relse(bp); trace_xfs_trans_read_buf_shut(bp, _RET_IP_); return -EIO; } if (tp) { _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_read_buf(bp->b_log_item); } ASSERT(bp->b_ops != NULL || ops == NULL); *bpp = bp; return 0; } /* Has this buffer been dirtied by anyone? */ bool xfs_trans_buf_is_dirty( struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; if (!bip) return false; ASSERT(bip->bli_item.li_type == XFS_LI_BUF); return test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags); } /* * Release a buffer previously joined to the transaction. If the buffer is * modified within this transaction, decrement the recursion count but do not * release the buffer even if the count goes to 0. If the buffer is not modified * within the transaction, decrement the recursion count and release the buffer * if the recursion count goes to 0. * * If the buffer is to be released and it was not already dirty before this * transaction began, then also free the buf_log_item associated with it. * * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call. */ void xfs_trans_brelse( struct xfs_trans *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); if (!tp) { xfs_buf_relse(bp); return; } trace_xfs_trans_brelse(bip); ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(atomic_read(&bip->bli_refcount) > 0); /* * If the release is for a recursive lookup, then decrement the count * and return. */ if (bip->bli_recur > 0) { bip->bli_recur--; return; } /* * If the buffer is invalidated or dirty in this transaction, we can't * release it until we commit. */ if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)) return; if (bip->bli_flags & XFS_BLI_STALE) return; /* * Unlink the log item from the transaction and clear the hold flag, if * set. We wouldn't want the next user of the buffer to get confused. */ ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); xfs_trans_del_item(&bip->bli_item); bip->bli_flags &= ~XFS_BLI_HOLD; /* drop the reference to the bli */ xfs_buf_item_put(bip); bp->b_transp = NULL; xfs_buf_relse(bp); } /* * Forcibly detach a buffer previously joined to the transaction. The caller * will retain its locked reference to the buffer after this function returns. * The buffer must be completely clean and must not be held to the transaction. */ void xfs_trans_bdetach( struct xfs_trans *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(tp != NULL); ASSERT(bp->b_transp == tp); ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_bdetach(bip); /* * Erase all recursion count, since we're removing this buffer from the * transaction. */ bip->bli_recur = 0; /* * The buffer must be completely clean. Specifically, it had better * not be dirty, stale, logged, ordered, or held to the transaction. */ ASSERT(!test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)); ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY)); ASSERT(!(bip->bli_flags & XFS_BLI_HOLD)); ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); /* Unlink the log item from the transaction and drop the log item. */ xfs_trans_del_item(&bip->bli_item); xfs_buf_item_put(bip); bp->b_transp = NULL; } /* * Mark the buffer as not needing to be unlocked when the buf item's * iop_committing() routine is called. The buffer must already be locked * and associated with the given transaction. */ /* ARGSUSED */ void xfs_trans_bhold( xfs_trans_t *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_HOLD; trace_xfs_trans_bhold(bip); } /* * Cancel the previous buffer hold request made on this buffer * for this transaction. */ void xfs_trans_bhold_release( xfs_trans_t *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(bip->bli_flags & XFS_BLI_HOLD); bip->bli_flags &= ~XFS_BLI_HOLD; trace_xfs_trans_bhold_release(bip); } /* * Mark a buffer dirty in the transaction. */ void xfs_trans_dirty_buf( struct xfs_trans *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); /* * Mark the buffer as needing to be written out eventually, * and set its iodone function to remove the buffer's buf log * item from the AIL and free it when the buffer is flushed * to disk. */ bp->b_flags |= XBF_DONE; ASSERT(atomic_read(&bip->bli_refcount) > 0); /* * If we invalidated the buffer within this transaction, then * cancel the invalidation now that we're dirtying the buffer * again. There are no races with the code in xfs_buf_item_unpin(), * because we have a reference to the buffer this entire time. */ if (bip->bli_flags & XFS_BLI_STALE) { bip->bli_flags &= ~XFS_BLI_STALE; ASSERT(bp->b_flags & XBF_STALE); bp->b_flags &= ~XBF_STALE; bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL; } bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags); } /* * This is called to mark bytes first through last inclusive of the given * buffer as needing to be logged when the transaction is committed. * The buffer must already be associated with the given transaction. * * First and last are numbers relative to the beginning of this buffer, * so the first byte in the buffer is numbered 0 regardless of the * value of b_blkno. */ void xfs_trans_log_buf( struct xfs_trans *tp, struct xfs_buf *bp, uint first, uint last) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(first <= last && last < BBTOB(bp->b_length)); ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED)); xfs_trans_dirty_buf(tp, bp); trace_xfs_trans_log_buf(bip); xfs_buf_item_log(bip, first, last); } /* * Invalidate a buffer that is being used within a transaction. * * Typically this is because the blocks in the buffer are being freed, so we * need to prevent it from being written out when we're done. Allowing it * to be written again might overwrite data in the free blocks if they are * reallocated to a file. * * We prevent the buffer from being written out by marking it stale. We can't * get rid of the buf log item at this point because the buffer may still be * pinned by another transaction. If that is the case, then we'll wait until * the buffer is committed to disk for the last time (we can tell by the ref * count) and free it in xfs_buf_item_unpin(). Until that happens we will * keep the buffer locked so that the buffer and buf log item are not reused. * * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log * the buf item. This will be used at recovery time to determine that copies * of the buffer in the log before this should not be replayed. * * We mark the item descriptor and the transaction dirty so that we'll hold * the buffer until after the commit. * * Since we're invalidating the buffer, we also clear the state about which * parts of the buffer have been logged. We also clear the flag indicating * that this is an inode buffer since the data in the buffer will no longer * be valid. * * We set the stale bit in the buffer as well since we're getting rid of it. */ void xfs_trans_binval( xfs_trans_t *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; int i; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); trace_xfs_trans_binval(bip); if (bip->bli_flags & XFS_BLI_STALE) { /* * If the buffer is already invalidated, then * just return. */ ASSERT(bp->b_flags & XBF_STALE); ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF)); ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK)); ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); ASSERT(test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)); ASSERT(tp->t_flags & XFS_TRANS_DIRTY); return; } xfs_buf_stale(bp); bip->bli_flags |= XFS_BLI_STALE; bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; bip->__bli_format.blf_flags |= XFS_BLF_CANCEL; bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK; for (i = 0; i < bip->bli_format_count; i++) { memset(bip->bli_formats[i].blf_data_map, 0, (bip->bli_formats[i].blf_map_size * sizeof(uint))); } set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags); tp->t_flags |= XFS_TRANS_DIRTY; } /* * This call is used to indicate that the buffer contains on-disk inodes which * must be handled specially during recovery. They require special handling * because only the di_next_unlinked from the inodes in the buffer should be * recovered. The rest of the data in the buffer is logged via the inodes * themselves. * * All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be * transferred to the buffer's log format structure so that we'll know what to * do at recovery time. */ void xfs_trans_inode_buf( xfs_trans_t *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_BUF; bp->b_iodone = xfs_buf_inode_iodone; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); } /* * This call is used to indicate that the buffer is going to * be staled and was an inode buffer. This means it gets * special processing during unpin - where any inodes * associated with the buffer should be removed from ail. * There is also special processing during recovery, * any replay of the inodes in the buffer needs to be * prevented as the buffer may have been reused. */ void xfs_trans_stale_inode_buf( xfs_trans_t *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_STALE_INODE; bp->b_iodone = xfs_buf_inode_iodone; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); } /* * Mark the buffer as being one which contains newly allocated * inodes. We need to make sure that even if this buffer is * relogged as an 'inode buf' we still recover all of the inode * images in the face of a crash. This works in coordination with * xfs_buf_item_committed() to ensure that the buffer remains in the * AIL at its original location even after it has been relogged. */ /* ARGSUSED */ void xfs_trans_inode_alloc_buf( xfs_trans_t *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; bp->b_iodone = xfs_buf_inode_iodone; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); } /* * Mark the buffer as ordered for this transaction. This means that the contents * of the buffer are not recorded in the transaction but it is tracked in the * AIL as though it was. This allows us to record logical changes in * transactions rather than the physical changes we make to the buffer without * changing writeback ordering constraints of metadata buffers. */ bool xfs_trans_ordered_buf( struct xfs_trans *tp, struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); if (xfs_buf_item_dirty_format(bip)) return false; bip->bli_flags |= XFS_BLI_ORDERED; trace_xfs_buf_item_ordered(bip); /* * We don't log a dirty range of an ordered buffer but it still needs * to be marked dirty and that it has been logged. */ xfs_trans_dirty_buf(tp, bp); return true; } /* * Set the type of the buffer for log recovery so that it can correctly identify * and hence attach the correct buffer ops to the buffer after replay. */ void xfs_trans_buf_set_type( struct xfs_trans *tp, struct xfs_buf *bp, enum xfs_blft type) { struct xfs_buf_log_item *bip = bp->b_log_item; if (!tp) return; ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); xfs_blft_to_flags(&bip->__bli_format, type); } void xfs_trans_buf_copy_type( struct xfs_buf *dst_bp, struct xfs_buf *src_bp) { struct xfs_buf_log_item *sbip = src_bp->b_log_item; struct xfs_buf_log_item *dbip = dst_bp->b_log_item; enum xfs_blft type; type = xfs_blft_from_flags(&sbip->__bli_format); xfs_blft_to_flags(&dbip->__bli_format, type); } /* * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of * dquots. However, unlike in inode buffer recovery, dquot buffers get * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag). * The only thing that makes dquot buffers different from regular * buffers is that we must not replay dquot bufs when recovering * if a _corresponding_ quotaoff has happened. We also have to distinguish * between usr dquot bufs and grp dquot bufs, because usr and grp quotas * can be turned off independently. */ /* ARGSUSED */ void xfs_trans_dquot_buf( xfs_trans_t *tp, struct xfs_buf *bp, uint type) { struct xfs_buf_log_item *bip = bp->b_log_item; ASSERT(type == XFS_BLF_UDQUOT_BUF || type == XFS_BLF_PDQUOT_BUF || type == XFS_BLF_GDQUOT_BUF); bip->__bli_format.blf_flags |= type; switch (type) { case XFS_BLF_UDQUOT_BUF: type = XFS_BLFT_UDQUOT_BUF; break; case XFS_BLF_PDQUOT_BUF: type = XFS_BLFT_PDQUOT_BUF; break; case XFS_BLF_GDQUOT_BUF: type = XFS_BLFT_GDQUOT_BUF; break; default: type = XFS_BLFT_UNKNOWN_BUF; break; } bp->b_iodone = xfs_buf_dquot_iodone; xfs_trans_buf_set_type(tp, bp, type); } |
| 1 8 8 8 8 5 8 8 8 8 4 4 4 5 4 5 5 5 5 5 5 5 5 5 5 15 12 10 6 8 8 15 15 15 15 15 1 3 3 6 3 15 15 4 15 15 15 15 15 15 8 15 44 44 4 4 4 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | // SPDX-License-Identifier: GPL-2.0 #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/wext.h> #include <net/hotdata.h> #include "dev.h" static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos) { unsigned long ifindex = *pos; struct net_device *dev; for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { *pos = dev->ifindex; return dev; } return NULL; } static void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; return dev_seq_from_index(seq, pos); } static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return dev_seq_from_index(seq, pos); } static void dev_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, stats->rx_length_errors + stats->rx_over_errors + stats->rx_crc_errors + stats->rx_frame_errors, stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors + stats->tx_window_errors + stats->tx_heartbeat_errors, stats->tx_compressed); } /* * Called from the PROCfs module. This now uses the new arbitrary sized * /proc/net interface to create /proc/net/dev */ static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Inter-| Receive " " | Transmit\n" " face |bytes packets errs drop fifo frame " "compressed multicast|bytes packets errs " "drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; } static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->input_pkt_queue); } static u32 softnet_process_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) { struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) { return softnet_get_online(pos); } static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return softnet_get_online(pos); } static void softnet_seq_stop(struct seq_file *seq, void *v) { } static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; u32 input_qlen = softnet_input_pkt_queue_len(sd); u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit *fl; rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) flow_limit_count = fl->count; rcu_read_unlock(); #endif /* the index is the CPU id owing this sd. Since offline CPUs are not * displayed, it would be othrwise not trivial for the user-space * mapping the data a specific CPU */ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", sd->processed, atomic_read(&sd->dropped), sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ sd->received_rps, flow_limit_count, input_qlen + process_qlen, (int)seq->index, input_qlen, process_qlen); return 0; } static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_seq_show, }; static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, .stop = softnet_seq_stop, .show = softnet_seq_show, }; static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; struct net_device *dev; loff_t i = 0; int t; for_each_netdev_rcu(seq_file_net(seq), dev) { ptype_list = &dev->ptype_all; list_for_each_entry_rcu(pt, ptype_list, list) { if (i == pos) return pt; ++i; } } list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { if (i == pos) return pt; ++i; } for (t = 0; t < PTYPE_HASH_SIZE; t++) { list_for_each_entry_rcu(pt, &ptype_base[t], list) { if (i == pos) return pt; ++i; } } return NULL; } static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; if (pt->dev) { if (nxt != &pt->dev->ptype_all) goto found; dev = pt->dev; for_each_netdev_continue_rcu(seq_file_net(seq), dev) { if (!list_empty(&dev->ptype_all)) { nxt = dev->ptype_all.next; goto found; } } nxt = net_hotdata.ptype_all.next; goto ptype_all; } if (pt->type == htons(ETH_P_ALL)) { ptype_all: if (nxt != &net_hotdata.ptype_all) goto found; hash = 0; nxt = ptype_base[0].next; } else hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { if (++hash >= PTYPE_HASH_SIZE) return NULL; nxt = ptype_base[hash].next; } found: return list_entry(nxt, struct packet_type, list); } static void ptype_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } return 0; } static const struct seq_operations ptype_seq_ops = { .start = ptype_seq_start, .next = ptype_seq_next, .stop = ptype_seq_stop, .show = ptype_seq_show, }; static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, sizeof(struct seq_net_private))) goto out; if (!proc_create_seq("softnet_stat", 0444, net->proc_net, &softnet_seq_ops)) goto out_dev; if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, sizeof(struct seq_net_private))) goto out_softnet; if (wext_proc_init(net)) goto out_ptype; rc = 0; out: return rc; out_ptype: remove_proc_entry("ptype", net->proc_net); out_softnet: remove_proc_entry("softnet_stat", net->proc_net); out_dev: remove_proc_entry("dev", net->proc_net); goto out; } static void __net_exit dev_proc_net_exit(struct net *net) { wext_proc_exit(net); remove_proc_entry("ptype", net->proc_net); remove_proc_entry("softnet_stat", net->proc_net); remove_proc_entry("dev", net->proc_net); } static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; struct net_device *dev = v; if (v == SEQ_START_TOKEN) return 0; netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; } static const struct seq_operations dev_mc_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_mc_seq_show, }; static int __net_init dev_mc_net_init(struct net *net) { if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static void __net_exit dev_mc_net_exit(struct net *net) { remove_proc_entry("dev_mcast", net->proc_net); } static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, }; int __init dev_proc_init(void) { int ret = register_pernet_subsys(&dev_proc_ops); if (!ret) return register_pernet_subsys(&dev_mc_net_ops); return ret; } |
| 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved. */ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H #include <linux/fs.h> handle_t *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, unsigned from, unsigned to); int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new); void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages); int walk_page_buffers( handle_t *handle, struct buffer_head *head, unsigned from, unsigned to, int *partial, int (*fn)( handle_t *handle, struct buffer_head *bh)); int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, void *fsdata); typedef enum { OCFS2_WRITE_BUFFER = 0, OCFS2_WRITE_DIRECT, OCFS2_WRITE_MMAP, } ocfs2_write_type_t; int ocfs2_write_begin_nolock(struct address_space *mapping, loff_t pos, unsigned len, ocfs2_write_type_t type, struct folio **foliop, void **fsdata, struct buffer_head *di_bh, struct page *mmap_page); int ocfs2_read_inline_data(struct inode *inode, struct page *page, struct buffer_head *di_bh); int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); int ocfs2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); /* all ocfs2_dio_end_io()'s fault */ #define ocfs2_iocb_is_rw_locked(iocb) \ test_bit(0, (unsigned long *)&iocb->private) static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) { set_bit(0, (unsigned long *)&iocb->private); if (level) set_bit(1, (unsigned long *)&iocb->private); else clear_bit(1, (unsigned long *)&iocb->private); } /* * Using a named enum representing lock types in terms of #N bit stored in * iocb->private, which is going to be used for communication between * ocfs2_dio_end_io() and ocfs2_file_write/read_iter(). */ enum ocfs2_iocb_lock_bits { OCFS2_IOCB_RW_LOCK = 0, OCFS2_IOCB_RW_LOCK_LEVEL, OCFS2_IOCB_NUM_LOCKS }; #define ocfs2_iocb_init_rw_locked(iocb) \ (iocb->private = NULL) #define ocfs2_iocb_clear_rw_locked(iocb) \ clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private) #define ocfs2_iocb_rw_locked_level(iocb) \ test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private) #endif /* OCFS2_FILE_H */ |
| 35 35 35 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 | // SPDX-License-Identifier: GPL-2.0+ /* * Helpers for controlling modem lines via GPIO * * Copyright (C) 2014 Paratronic S.A. */ #include <linux/err.h> #include <linux/device.h> #include <linux/irq.h> #include <linux/gpio/consumer.h> #include <linux/termios.h> #include <linux/serial_core.h> #include <linux/module.h> #include <linux/property.h> #include "serial_mctrl_gpio.h" struct mctrl_gpios { struct uart_port *port; struct gpio_desc *gpio[UART_GPIO_MAX]; int irq[UART_GPIO_MAX]; unsigned int mctrl_prev; bool mctrl_on; }; static const struct { const char *name; unsigned int mctrl; enum gpiod_flags flags; } mctrl_gpios_desc[UART_GPIO_MAX] = { { "cts", TIOCM_CTS, GPIOD_IN, }, { "dsr", TIOCM_DSR, GPIOD_IN, }, { "dcd", TIOCM_CD, GPIOD_IN, }, { "rng", TIOCM_RNG, GPIOD_IN, }, { "rts", TIOCM_RTS, GPIOD_OUT_LOW, }, { "dtr", TIOCM_DTR, GPIOD_OUT_LOW, }, }; static bool mctrl_gpio_flags_is_dir_out(unsigned int idx) { return mctrl_gpios_desc[idx].flags & GPIOD_FLAGS_BIT_DIR_OUT; } /** * mctrl_gpio_set - set gpios according to mctrl state * @gpios: gpios to set * @mctrl: state to set * * Set the gpios according to the mctrl state. */ void mctrl_gpio_set(struct mctrl_gpios *gpios, unsigned int mctrl) { enum mctrl_gpio_idx i; struct gpio_desc *desc_array[UART_GPIO_MAX]; DECLARE_BITMAP(values, UART_GPIO_MAX); unsigned int count = 0; if (gpios == NULL) return; for (i = 0; i < UART_GPIO_MAX; i++) if (gpios->gpio[i] && mctrl_gpio_flags_is_dir_out(i)) { desc_array[count] = gpios->gpio[i]; __assign_bit(count, values, mctrl & mctrl_gpios_desc[i].mctrl); count++; } gpiod_set_array_value(count, desc_array, NULL, values); } EXPORT_SYMBOL_GPL(mctrl_gpio_set); /** * mctrl_gpio_to_gpiod - obtain gpio_desc of modem line index * @gpios: gpios to look into * @gidx: index of the modem line * Returns: the gpio_desc structure associated to the modem line index */ struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios, enum mctrl_gpio_idx gidx) { if (gpios == NULL) return NULL; return gpios->gpio[gidx]; } EXPORT_SYMBOL_GPL(mctrl_gpio_to_gpiod); /** * mctrl_gpio_get - update mctrl with the gpios values. * @gpios: gpios to get the info from * @mctrl: mctrl to set * Returns: modified mctrl (the same value as in @mctrl) * * Update mctrl with the gpios values. */ unsigned int mctrl_gpio_get(struct mctrl_gpios *gpios, unsigned int *mctrl) { enum mctrl_gpio_idx i; if (gpios == NULL) return *mctrl; for (i = 0; i < UART_GPIO_MAX; i++) { if (gpios->gpio[i] && !mctrl_gpio_flags_is_dir_out(i)) { if (gpiod_get_value(gpios->gpio[i])) *mctrl |= mctrl_gpios_desc[i].mctrl; else *mctrl &= ~mctrl_gpios_desc[i].mctrl; } } return *mctrl; } EXPORT_SYMBOL_GPL(mctrl_gpio_get); unsigned int mctrl_gpio_get_outputs(struct mctrl_gpios *gpios, unsigned int *mctrl) { enum mctrl_gpio_idx i; if (gpios == NULL) return *mctrl; for (i = 0; i < UART_GPIO_MAX; i++) { if (gpios->gpio[i] && mctrl_gpio_flags_is_dir_out(i)) { if (gpiod_get_value(gpios->gpio[i])) *mctrl |= mctrl_gpios_desc[i].mctrl; else *mctrl &= ~mctrl_gpios_desc[i].mctrl; } } return *mctrl; } EXPORT_SYMBOL_GPL(mctrl_gpio_get_outputs); struct mctrl_gpios *mctrl_gpio_init_noauto(struct device *dev, unsigned int idx) { struct mctrl_gpios *gpios; enum mctrl_gpio_idx i; gpios = devm_kzalloc(dev, sizeof(*gpios), GFP_KERNEL); if (!gpios) return ERR_PTR(-ENOMEM); for (i = 0; i < UART_GPIO_MAX; i++) { char *gpio_str; bool present; /* Check if GPIO property exists and continue if not */ gpio_str = kasprintf(GFP_KERNEL, "%s-gpios", mctrl_gpios_desc[i].name); if (!gpio_str) continue; present = device_property_present(dev, gpio_str); kfree(gpio_str); if (!present) continue; gpios->gpio[i] = devm_gpiod_get_index_optional(dev, mctrl_gpios_desc[i].name, idx, mctrl_gpios_desc[i].flags); if (IS_ERR(gpios->gpio[i])) return ERR_CAST(gpios->gpio[i]); } return gpios; } EXPORT_SYMBOL_GPL(mctrl_gpio_init_noauto); #define MCTRL_ANY_DELTA (TIOCM_RI | TIOCM_DSR | TIOCM_CD | TIOCM_CTS) static irqreturn_t mctrl_gpio_irq_handle(int irq, void *context) { struct mctrl_gpios *gpios = context; struct uart_port *port = gpios->port; u32 mctrl = gpios->mctrl_prev; u32 mctrl_diff; unsigned long flags; mctrl_gpio_get(gpios, &mctrl); uart_port_lock_irqsave(port, &flags); mctrl_diff = mctrl ^ gpios->mctrl_prev; gpios->mctrl_prev = mctrl; if (mctrl_diff & MCTRL_ANY_DELTA && port->state != NULL) { if ((mctrl_diff & mctrl) & TIOCM_RI) port->icount.rng++; if ((mctrl_diff & mctrl) & TIOCM_DSR) port->icount.dsr++; if (mctrl_diff & TIOCM_CD) uart_handle_dcd_change(port, mctrl & TIOCM_CD); if (mctrl_diff & TIOCM_CTS) uart_handle_cts_change(port, mctrl & TIOCM_CTS); wake_up_interruptible(&port->state->port.delta_msr_wait); } uart_port_unlock_irqrestore(port, flags); return IRQ_HANDLED; } /** * mctrl_gpio_init - initialize uart gpios * @port: port to initialize gpios for * @idx: index of the gpio in the @port's device * * This will get the {cts,rts,...}-gpios from device tree if they are present * and request them, set direction etc, and return an allocated structure. * `devm_*` functions are used, so there's no need to call mctrl_gpio_free(). * As this sets up the irq handling, make sure to not handle changes to the * gpio input lines in your driver, too. */ struct mctrl_gpios *mctrl_gpio_init(struct uart_port *port, unsigned int idx) { struct mctrl_gpios *gpios; enum mctrl_gpio_idx i; gpios = mctrl_gpio_init_noauto(port->dev, idx); if (IS_ERR(gpios)) return gpios; gpios->port = port; for (i = 0; i < UART_GPIO_MAX; ++i) { int ret; if (!gpios->gpio[i] || mctrl_gpio_flags_is_dir_out(i)) continue; ret = gpiod_to_irq(gpios->gpio[i]); if (ret < 0) { dev_err(port->dev, "failed to find corresponding irq for %s (idx=%d, err=%d)\n", mctrl_gpios_desc[i].name, idx, ret); return ERR_PTR(ret); } gpios->irq[i] = ret; /* irqs should only be enabled in .enable_ms */ irq_set_status_flags(gpios->irq[i], IRQ_NOAUTOEN); ret = devm_request_irq(port->dev, gpios->irq[i], mctrl_gpio_irq_handle, IRQ_TYPE_EDGE_BOTH, dev_name(port->dev), gpios); if (ret) { /* alternatively implement polling */ dev_err(port->dev, "failed to request irq for %s (idx=%d, err=%d)\n", mctrl_gpios_desc[i].name, idx, ret); return ERR_PTR(ret); } } return gpios; } EXPORT_SYMBOL_GPL(mctrl_gpio_init); /** * mctrl_gpio_free - explicitly free uart gpios * @dev: uart port's device * @gpios: gpios structure to be freed * * This will free the requested gpios in mctrl_gpio_init(). As `devm_*` * functions are used, there's generally no need to call this function. */ void mctrl_gpio_free(struct device *dev, struct mctrl_gpios *gpios) { enum mctrl_gpio_idx i; if (gpios == NULL) return; for (i = 0; i < UART_GPIO_MAX; i++) { if (gpios->irq[i]) devm_free_irq(gpios->port->dev, gpios->irq[i], gpios); if (gpios->gpio[i]) devm_gpiod_put(dev, gpios->gpio[i]); } devm_kfree(dev, gpios); } EXPORT_SYMBOL_GPL(mctrl_gpio_free); /** * mctrl_gpio_enable_ms - enable irqs and handling of changes to the ms lines * @gpios: gpios to enable */ void mctrl_gpio_enable_ms(struct mctrl_gpios *gpios) { enum mctrl_gpio_idx i; if (gpios == NULL) return; /* .enable_ms may be called multiple times */ if (gpios->mctrl_on) return; gpios->mctrl_on = true; /* get initial status of modem lines GPIOs */ mctrl_gpio_get(gpios, &gpios->mctrl_prev); for (i = 0; i < UART_GPIO_MAX; ++i) { if (!gpios->irq[i]) continue; enable_irq(gpios->irq[i]); } } EXPORT_SYMBOL_GPL(mctrl_gpio_enable_ms); /** * mctrl_gpio_disable_ms - disable irqs and handling of changes to the ms lines * @gpios: gpios to disable */ void mctrl_gpio_disable_ms(struct mctrl_gpios *gpios) { enum mctrl_gpio_idx i; if (gpios == NULL) return; if (!gpios->mctrl_on) return; gpios->mctrl_on = false; for (i = 0; i < UART_GPIO_MAX; ++i) { if (!gpios->irq[i]) continue; disable_irq(gpios->irq[i]); } } EXPORT_SYMBOL_GPL(mctrl_gpio_disable_ms); void mctrl_gpio_enable_irq_wake(struct mctrl_gpios *gpios) { enum mctrl_gpio_idx i; if (!gpios) return; if (!gpios->mctrl_on) return; for (i = 0; i < UART_GPIO_MAX; ++i) { if (!gpios->irq[i]) continue; enable_irq_wake(gpios->irq[i]); } } EXPORT_SYMBOL_GPL(mctrl_gpio_enable_irq_wake); void mctrl_gpio_disable_irq_wake(struct mctrl_gpios *gpios) { enum mctrl_gpio_idx i; if (!gpios) return; if (!gpios->mctrl_on) return; for (i = 0; i < UART_GPIO_MAX; ++i) { if (!gpios->irq[i]) continue; disable_irq_wake(gpios->irq[i]); } } EXPORT_SYMBOL_GPL(mctrl_gpio_disable_irq_wake); MODULE_DESCRIPTION("Helpers for controlling modem lines via GPIO"); MODULE_LICENSE("GPL"); |
| 2 2 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for Zarlink DVB-T ZL10353 demodulator * * Copyright (C) 2006, 2007 Christopher Pascoe <c.pascoe@itee.uq.edu.au> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/slab.h> #include <asm/div64.h> #include <media/dvb_frontend.h> #include "zl10353_priv.h" #include "zl10353.h" struct zl10353_state { struct i2c_adapter *i2c; struct dvb_frontend frontend; struct zl10353_config config; u32 bandwidth; u32 ucblocks; u32 frequency; }; static int debug; #define dprintk(args...) \ do { \ if (debug) printk(KERN_DEBUG "zl10353: " args); \ } while (0) static int debug_regs; static int zl10353_single_write(struct dvb_frontend *fe, u8 reg, u8 val) { struct zl10353_state *state = fe->demodulator_priv; u8 buf[2] = { reg, val }; struct i2c_msg msg = { .addr = state->config.demod_address, .flags = 0, .buf = buf, .len = 2 }; int err = i2c_transfer(state->i2c, &msg, 1); if (err != 1) { printk("zl10353: write to reg %x failed (err = %d)!\n", reg, err); return err; } return 0; } static int zl10353_write(struct dvb_frontend *fe, const u8 ibuf[], int ilen) { int err, i; for (i = 0; i < ilen - 1; i++) if ((err = zl10353_single_write(fe, ibuf[0] + i, ibuf[i + 1]))) return err; return 0; } static int zl10353_read_register(struct zl10353_state *state, u8 reg) { int ret; u8 b0[1] = { reg }; u8 b1[1] = { 0 }; struct i2c_msg msg[2] = { { .addr = state->config.demod_address, .flags = 0, .buf = b0, .len = 1 }, { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = b1, .len = 1 } }; ret = i2c_transfer(state->i2c, msg, 2); if (ret != 2) { printk("%s: readreg error (reg=%d, ret==%i)\n", __func__, reg, ret); return ret; } return b1[0]; } static void zl10353_dump_regs(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; int ret; u8 reg; /* Dump all registers. */ for (reg = 0; ; reg++) { if (reg % 16 == 0) { if (reg) printk(KERN_CONT "\n"); printk(KERN_DEBUG "%02x:", reg); } ret = zl10353_read_register(state, reg); if (ret >= 0) printk(KERN_CONT " %02x", (u8)ret); else printk(KERN_CONT " --"); if (reg == 0xff) break; } printk(KERN_CONT "\n"); } static void zl10353_calc_nominal_rate(struct dvb_frontend *fe, u32 bandwidth, u16 *nominal_rate) { struct zl10353_state *state = fe->demodulator_priv; u32 adc_clock = 450560; /* 45.056 MHz */ u64 value; u8 bw = bandwidth / 1000000; if (state->config.adc_clock) adc_clock = state->config.adc_clock; value = (u64)10 * (1 << 23) / 7 * 125; value = (bw * value) + adc_clock / 2; *nominal_rate = div_u64(value, adc_clock); dprintk("%s: bw %d, adc_clock %d => 0x%x\n", __func__, bw, adc_clock, *nominal_rate); } static void zl10353_calc_input_freq(struct dvb_frontend *fe, u16 *input_freq) { struct zl10353_state *state = fe->demodulator_priv; u32 adc_clock = 450560; /* 45.056 MHz */ int if2 = 361667; /* 36.1667 MHz */ int ife; u64 value; if (state->config.adc_clock) adc_clock = state->config.adc_clock; if (state->config.if2) if2 = state->config.if2; if (adc_clock >= if2 * 2) ife = if2; else { ife = adc_clock - (if2 % adc_clock); if (ife > adc_clock / 2) ife = adc_clock - ife; } value = div_u64((u64)65536 * ife + adc_clock / 2, adc_clock); *input_freq = -value; dprintk("%s: if2 %d, ife %d, adc_clock %d => %d / 0x%x\n", __func__, if2, ife, adc_clock, -(int)value, *input_freq); } static int zl10353_sleep(struct dvb_frontend *fe) { static u8 zl10353_softdown[] = { 0x50, 0x0C, 0x44 }; zl10353_write(fe, zl10353_softdown, sizeof(zl10353_softdown)); return 0; } static int zl10353_set_parameters(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct zl10353_state *state = fe->demodulator_priv; u16 nominal_rate, input_freq; u8 pllbuf[6] = { 0x67 }, acq_ctl = 0; u16 tps = 0; state->frequency = c->frequency; zl10353_single_write(fe, RESET, 0x80); udelay(200); zl10353_single_write(fe, 0xEA, 0x01); udelay(200); zl10353_single_write(fe, 0xEA, 0x00); zl10353_single_write(fe, AGC_TARGET, 0x28); if (c->transmission_mode != TRANSMISSION_MODE_AUTO) acq_ctl |= (1 << 0); if (c->guard_interval != GUARD_INTERVAL_AUTO) acq_ctl |= (1 << 1); zl10353_single_write(fe, ACQ_CTL, acq_ctl); switch (c->bandwidth_hz) { case 6000000: /* These are extrapolated from the 7 and 8MHz values */ zl10353_single_write(fe, MCLK_RATIO, 0x97); zl10353_single_write(fe, 0x64, 0x34); zl10353_single_write(fe, 0xcc, 0xdd); break; case 7000000: zl10353_single_write(fe, MCLK_RATIO, 0x86); zl10353_single_write(fe, 0x64, 0x35); zl10353_single_write(fe, 0xcc, 0x73); break; default: c->bandwidth_hz = 8000000; fallthrough; case 8000000: zl10353_single_write(fe, MCLK_RATIO, 0x75); zl10353_single_write(fe, 0x64, 0x36); zl10353_single_write(fe, 0xcc, 0x73); } zl10353_calc_nominal_rate(fe, c->bandwidth_hz, &nominal_rate); zl10353_single_write(fe, TRL_NOMINAL_RATE_1, msb(nominal_rate)); zl10353_single_write(fe, TRL_NOMINAL_RATE_0, lsb(nominal_rate)); state->bandwidth = c->bandwidth_hz; zl10353_calc_input_freq(fe, &input_freq); zl10353_single_write(fe, INPUT_FREQ_1, msb(input_freq)); zl10353_single_write(fe, INPUT_FREQ_0, lsb(input_freq)); /* Hint at TPS settings */ switch (c->code_rate_HP) { case FEC_2_3: tps |= (1 << 7); break; case FEC_3_4: tps |= (2 << 7); break; case FEC_5_6: tps |= (3 << 7); break; case FEC_7_8: tps |= (4 << 7); break; case FEC_1_2: case FEC_AUTO: break; default: return -EINVAL; } switch (c->code_rate_LP) { case FEC_2_3: tps |= (1 << 4); break; case FEC_3_4: tps |= (2 << 4); break; case FEC_5_6: tps |= (3 << 4); break; case FEC_7_8: tps |= (4 << 4); break; case FEC_1_2: case FEC_AUTO: break; case FEC_NONE: if (c->hierarchy == HIERARCHY_AUTO || c->hierarchy == HIERARCHY_NONE) break; fallthrough; default: return -EINVAL; } switch (c->modulation) { case QPSK: break; case QAM_AUTO: case QAM_16: tps |= (1 << 13); break; case QAM_64: tps |= (2 << 13); break; default: return -EINVAL; } switch (c->transmission_mode) { case TRANSMISSION_MODE_2K: case TRANSMISSION_MODE_AUTO: break; case TRANSMISSION_MODE_8K: tps |= (1 << 0); break; default: return -EINVAL; } switch (c->guard_interval) { case GUARD_INTERVAL_1_32: case GUARD_INTERVAL_AUTO: break; case GUARD_INTERVAL_1_16: tps |= (1 << 2); break; case GUARD_INTERVAL_1_8: tps |= (2 << 2); break; case GUARD_INTERVAL_1_4: tps |= (3 << 2); break; default: return -EINVAL; } switch (c->hierarchy) { case HIERARCHY_AUTO: case HIERARCHY_NONE: break; case HIERARCHY_1: tps |= (1 << 10); break; case HIERARCHY_2: tps |= (2 << 10); break; case HIERARCHY_4: tps |= (3 << 10); break; default: return -EINVAL; } zl10353_single_write(fe, TPS_GIVEN_1, msb(tps)); zl10353_single_write(fe, TPS_GIVEN_0, lsb(tps)); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); /* * If there is no tuner attached to the secondary I2C bus, we call * set_params to program a potential tuner attached somewhere else. * Otherwise, we update the PLL registers via calc_regs. */ if (state->config.no_tuner) { if (fe->ops.tuner_ops.set_params) { fe->ops.tuner_ops.set_params(fe); if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); } } else if (fe->ops.tuner_ops.calc_regs) { fe->ops.tuner_ops.calc_regs(fe, pllbuf + 1, 5); pllbuf[1] <<= 1; zl10353_write(fe, pllbuf, sizeof(pllbuf)); } zl10353_single_write(fe, 0x5F, 0x13); /* If no attached tuner or invalid PLL registers, just start the FSM. */ if (state->config.no_tuner || fe->ops.tuner_ops.calc_regs == NULL) zl10353_single_write(fe, FSM_GO, 0x01); else zl10353_single_write(fe, TUNER_GO, 0x01); return 0; } static int zl10353_get_parameters(struct dvb_frontend *fe, struct dtv_frontend_properties *c) { struct zl10353_state *state = fe->demodulator_priv; int s6, s9; u16 tps; static const u8 tps_fec_to_api[8] = { FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8, FEC_AUTO, FEC_AUTO, FEC_AUTO }; s6 = zl10353_read_register(state, STATUS_6); s9 = zl10353_read_register(state, STATUS_9); if (s6 < 0 || s9 < 0) return -EREMOTEIO; if ((s6 & (1 << 5)) == 0 || (s9 & (1 << 4)) == 0) return -EINVAL; /* no FE or TPS lock */ tps = zl10353_read_register(state, TPS_RECEIVED_1) << 8 | zl10353_read_register(state, TPS_RECEIVED_0); c->code_rate_HP = tps_fec_to_api[(tps >> 7) & 7]; c->code_rate_LP = tps_fec_to_api[(tps >> 4) & 7]; switch ((tps >> 13) & 3) { case 0: c->modulation = QPSK; break; case 1: c->modulation = QAM_16; break; case 2: c->modulation = QAM_64; break; default: c->modulation = QAM_AUTO; break; } c->transmission_mode = (tps & 0x01) ? TRANSMISSION_MODE_8K : TRANSMISSION_MODE_2K; switch ((tps >> 2) & 3) { case 0: c->guard_interval = GUARD_INTERVAL_1_32; break; case 1: c->guard_interval = GUARD_INTERVAL_1_16; break; case 2: c->guard_interval = GUARD_INTERVAL_1_8; break; case 3: c->guard_interval = GUARD_INTERVAL_1_4; break; default: c->guard_interval = GUARD_INTERVAL_AUTO; break; } switch ((tps >> 10) & 7) { case 0: c->hierarchy = HIERARCHY_NONE; break; case 1: c->hierarchy = HIERARCHY_1; break; case 2: c->hierarchy = HIERARCHY_2; break; case 3: c->hierarchy = HIERARCHY_4; break; default: c->hierarchy = HIERARCHY_AUTO; break; } c->frequency = state->frequency; c->bandwidth_hz = state->bandwidth; c->inversion = INVERSION_AUTO; return 0; } static int zl10353_read_status(struct dvb_frontend *fe, enum fe_status *status) { struct zl10353_state *state = fe->demodulator_priv; int s6, s7, s8; if ((s6 = zl10353_read_register(state, STATUS_6)) < 0) return -EREMOTEIO; if ((s7 = zl10353_read_register(state, STATUS_7)) < 0) return -EREMOTEIO; if ((s8 = zl10353_read_register(state, STATUS_8)) < 0) return -EREMOTEIO; *status = 0; if (s6 & (1 << 2)) *status |= FE_HAS_CARRIER; if (s6 & (1 << 1)) *status |= FE_HAS_VITERBI; if (s6 & (1 << 5)) *status |= FE_HAS_LOCK; if (s7 & (1 << 4)) *status |= FE_HAS_SYNC; if (s8 & (1 << 6)) *status |= FE_HAS_SIGNAL; if ((*status & (FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC)) != (FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC)) *status &= ~FE_HAS_LOCK; return 0; } static int zl10353_read_ber(struct dvb_frontend *fe, u32 *ber) { struct zl10353_state *state = fe->demodulator_priv; *ber = zl10353_read_register(state, RS_ERR_CNT_2) << 16 | zl10353_read_register(state, RS_ERR_CNT_1) << 8 | zl10353_read_register(state, RS_ERR_CNT_0); return 0; } static int zl10353_read_signal_strength(struct dvb_frontend *fe, u16 *strength) { struct zl10353_state *state = fe->demodulator_priv; u16 signal = zl10353_read_register(state, AGC_GAIN_1) << 10 | zl10353_read_register(state, AGC_GAIN_0) << 2 | 3; *strength = ~signal; return 0; } static int zl10353_read_snr(struct dvb_frontend *fe, u16 *snr) { struct zl10353_state *state = fe->demodulator_priv; u8 _snr; if (debug_regs) zl10353_dump_regs(fe); _snr = zl10353_read_register(state, SNR); *snr = 10 * _snr / 8; return 0; } static int zl10353_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks) { struct zl10353_state *state = fe->demodulator_priv; u32 ubl = 0; ubl = zl10353_read_register(state, RS_UBC_1) << 8 | zl10353_read_register(state, RS_UBC_0); state->ucblocks += ubl; *ucblocks = state->ucblocks; return 0; } static int zl10353_get_tune_settings(struct dvb_frontend *fe, struct dvb_frontend_tune_settings *fe_tune_settings) { fe_tune_settings->min_delay_ms = 1000; fe_tune_settings->step_size = 0; fe_tune_settings->max_drift = 0; return 0; } static int zl10353_init(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; u8 zl10353_reset_attach[6] = { 0x50, 0x03, 0x64, 0x46, 0x15, 0x0F }; if (debug_regs) zl10353_dump_regs(fe); if (state->config.parallel_ts) zl10353_reset_attach[2] &= ~0x20; if (state->config.clock_ctl_1) zl10353_reset_attach[3] = state->config.clock_ctl_1; if (state->config.pll_0) zl10353_reset_attach[4] = state->config.pll_0; /* Do a "hard" reset if not already done */ if (zl10353_read_register(state, 0x50) != zl10353_reset_attach[1] || zl10353_read_register(state, 0x51) != zl10353_reset_attach[2]) { zl10353_write(fe, zl10353_reset_attach, sizeof(zl10353_reset_attach)); if (debug_regs) zl10353_dump_regs(fe); } return 0; } static int zl10353_i2c_gate_ctrl(struct dvb_frontend* fe, int enable) { struct zl10353_state *state = fe->demodulator_priv; u8 val = 0x0a; if (state->config.disable_i2c_gate_ctrl) { /* No tuner attached to the internal I2C bus */ /* If set enable I2C bridge, the main I2C bus stopped hardly */ return 0; } if (enable) val |= 0x10; return zl10353_single_write(fe, 0x62, val); } static void zl10353_release(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; kfree(state); } static const struct dvb_frontend_ops zl10353_ops; struct dvb_frontend *zl10353_attach(const struct zl10353_config *config, struct i2c_adapter *i2c) { struct zl10353_state *state = NULL; int id; /* allocate memory for the internal state */ state = kzalloc(sizeof(struct zl10353_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ state->i2c = i2c; memcpy(&state->config, config, sizeof(struct zl10353_config)); /* check if the demod is there */ id = zl10353_read_register(state, CHIP_ID); if ((id != ID_ZL10353) && (id != ID_CE6230) && (id != ID_CE6231)) goto error; /* create dvb_frontend */ memcpy(&state->frontend.ops, &zl10353_ops, sizeof(struct dvb_frontend_ops)); state->frontend.demodulator_priv = state; return &state->frontend; error: kfree(state); return NULL; } static const struct dvb_frontend_ops zl10353_ops = { .delsys = { SYS_DVBT }, .info = { .name = "Zarlink ZL10353 DVB-T", .frequency_min_hz = 174 * MHz, .frequency_max_hz = 862 * MHz, .frequency_stepsize_hz = 166667, .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO | FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_HIERARCHY_AUTO | FE_CAN_RECOVER | FE_CAN_MUTE_TS }, .release = zl10353_release, .init = zl10353_init, .sleep = zl10353_sleep, .i2c_gate_ctrl = zl10353_i2c_gate_ctrl, .write = zl10353_write, .set_frontend = zl10353_set_parameters, .get_frontend = zl10353_get_parameters, .get_tune_settings = zl10353_get_tune_settings, .read_status = zl10353_read_status, .read_ber = zl10353_read_ber, .read_signal_strength = zl10353_read_signal_strength, .read_snr = zl10353_read_snr, .read_ucblocks = zl10353_read_ucblocks, }; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off)."); module_param(debug_regs, int, 0644); MODULE_PARM_DESC(debug_regs, "Turn on/off frontend register dumps (default:off)."); MODULE_DESCRIPTION("Zarlink ZL10353 DVB-T demodulator driver"); MODULE_AUTHOR("Chris Pascoe"); MODULE_LICENSE("GPL"); EXPORT_SYMBOL_GPL(zl10353_attach); |
| 5 5 1740 2 222 1741 1751 1745 1742 1746 1740 1745 1739 1743 1744 1743 272 272 260 261 61 269 267 260 6 9 261 99 142 2 94 195 234 239 37 103 238 105 163 4 1 3 4 7 305 302 300 293 301 6 278 151 152 109 99 109 313 1 309 310 2 305 305 7 3 17 274 165 269 239 239 1 1 3 2 2 1 3 1 2 1 1 1 21 1 2 318 1 3 1 1 282 21 5 1 3 1 1 331 1 321 10 21 1 10 5 6 8 5 1 4 4 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 | // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <linux/poison.h> #include <net/ipv6.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); void *ip6t_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ip6t, IP6T); } EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, const char *outdev, const struct ip6t_ip6 *ip6info, unsigned int *protoff, u16 *fragoff, bool *hotdrop) { unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); if (NF_INVF(ip6info, IP6T_INV_SRCIP, ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src)) || NF_INVF(ip6info, IP6T_INV_DSTIP, ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ /* look for the desired protocol header */ if (ip6info->flags & IP6T_F_PROTO) { int protohdr; unsigned short _frag_off; protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL); if (protohdr < 0) { if (_frag_off == 0) *hotdrop = true; return false; } *fragoff = _frag_off; if (ip6info->proto == protohdr) { if (ip6info->invflags & IP6T_INV_PROTO) return false; return true; } /* We need match for the '-p all', too! */ if ((ip6info->proto != 0) && !(ip6info->invflags & IP6T_INV_PROTO)) return false; } return true; } /* should be ip6 safe */ static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) return false; if (ipv6->invflags & ~IP6T_INV_MASK) return false; return true; } static unsigned int ip6t_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } static inline struct ip6t_entry * get_entry(const void *base, unsigned int offset) { return (struct ip6t_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; return e->target_offset == sizeof(struct ip6t_entry) && memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * ip6t_get_target_c(const struct ip6t_entry *e) { return ip6t_get_target((struct ip6t_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* This cries for unification! */ static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { NF_IP6_TRACE_COMMENT_RULE, NF_IP6_TRACE_COMMENT_RETURN, NF_IP6_TRACE_COMMENT_POLICY, }; static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_RULE] = "rule", [NF_IP6_TRACE_COMMENT_RETURN] = "return", [NF_IP6_TRACE_COMMENT_POLICY] = "policy", }; static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = LOGLEVEL_WARNING, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { const struct xt_standard_target *t = (void *)ip6t_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ *chainname = t->target.data; (*rulenum) = 0; } else if (s == e) { (*rulenum)++; if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] : comments[NF_IP6_TRACE_COMMENT_RETURN]; } return 1; } else (*rulenum)++; return 0; } static void trace_packet(struct net *net, const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, const struct xt_table_info *private, const struct ip6t_entry *e) { const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) if (get_chainname_rulenum(iter, e, hookname, &chainname, &comment, &rulenum) != 0) break; nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } #endif static inline struct ip6t_entry * ip6t_next_entry(const struct ip6t_entry *entry) { return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; const void *table_base; struct ip6t_entry *e, **jumpstack; unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ stackidx = 0; indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state; WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; /* Switch to alternate jumpstack if we're being invoked via TEE. * TEE issues XT_CONTINUE verdict on original skb so we must not * clobber the jumpstack. * * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ if (static_key_false(&xt_tee_enabled)) jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; struct xt_counters *counter; WARN_ON(!e); acpar.thoff = 0; if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { no_match: e = ip6t_next_entry(e); continue; } xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(state->net, skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) e = get_entry(table_base, private->underflow[hook]); else e = ip6t_next_entry(jumpstack[--stackidx]); continue; } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) e = ip6t_next_entry(e); else /* Verdict */ break; } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)ip6t_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static void cleanup_match(struct xt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV6; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; par->match = m->u.kernel.match; par->matchinfo = m->data; return xt_check_match(par, m->u.match_size - sizeof(*m), ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); } static int find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; ret = check_match(m, par); if (ret) goto err; return 0; err: module_put(m->u.kernel.match->me); return ret; } static int check_target(struct ip6t_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ip6t_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV6, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); } static int find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV6; xt_ematch_foreach(ematch, e) { ret = find_check_match(ematch, &mtpar); if (ret != 0) goto cleanup_matches; ++j; } t = ip6t_get_target(e); target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; cleanup_match(ematch, net); } xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct ip6t_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!ip6_checkentry(&e->ipv6)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct ip6t_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) cleanup_match(ematch, net); t = ip6t_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV6; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ip6t_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ip6t_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ip6t_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ip6t_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { const struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; const struct xt_entry_match *m; const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } for (i = sizeof(struct ip6t_entry); i < e->target_offset; i += m->u.match_size) { m = (void *)e + i; if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ip6t_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(AF_INET6, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(AF_INET6, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct ip6t_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_match *ematch; const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - base; xt_ematch_foreach(ematch, e) off += xt_compat_match_offset(ematch->u.kernel.match); t = ip6t_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_INET_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct ip6t_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct ip6t_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(AF_INET6, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct ip6t_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(AF_INET6); #endif t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); memcpy(info.underflow, private->underflow, sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; strcpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; xt_table_unlock(t); module_put(t->me); } else ret = PTR_ERR(t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_unlock(AF_INET6); #endif return ret; } static int get_entries(struct net *net, struct ip6t_get_entries __user *uptr, const int *len) { int ret; struct ip6t_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct ip6t_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else ret = -EAGAIN; module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); return ret; } static int __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { int ret; struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; struct ip6t_entry *iter; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; } t = xt_request_find_table_lock(net, AF_INET6, name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } /* You lied! */ if (valid_hooks != t->valid_hooks) { ret = -EINVAL; goto put_module; } oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); xt_table_unlock(t); get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); } vfree(counters); return 0; put_module: module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: vfree(counters); out: return ret; } static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ip6t_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free; } local_bh_disable(); private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: vfree(paddc); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct compat_ip6t_replace { char name[XT_TABLE_MAXNAMELEN]; u32 valid_hooks; u32 num_entries; u32 size; u32 hook_entry[NF_INET_NUMHOOKS]; u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ struct compat_ip6t_entry entries[]; }; static int compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, unsigned int i) { struct xt_entry_target *t; struct compat_ip6t_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; const struct xt_entry_match *ematch; int ret = 0; origsize = *size; ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) return -EFAULT; *dstptr += sizeof(struct compat_ip6t_entry); *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); xt_ematch_foreach(ematch, e) { ret = xt_compat_match_to_user(ematch, dstptr, size); if (ret != 0) return ret; } target_offset = e->target_offset - (origsize - *size); t = ip6t_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(target_offset, &ce->target_offset) != 0 || put_user(next_offset, &ce->next_offset) != 0) return -EFAULT; return 0; } static int compat_find_calc_match(struct xt_entry_match *m, const struct ip6t_ip6 *ipv6, int *size) { struct xt_match *match; match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; } static void compat_release_entry(struct compat_ip6t_entry *e) { struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) module_put(ematch->u.kernel.match->me); t = compat_ip6t_get_target(e); module_put(t->u.kernel.target->me); } static int check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; int ret, off; if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct compat_ip6t_entry) + sizeof(struct compat_xt_entry_target)) return -EINVAL; if (!ip6_checkentry(&e->ipv6)) return -EINVAL; ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; } t = compat_ip6t_get_target(e); target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; off += xt_compat_target_offset(target); *size += off; ret = xt_compat_add_offset(AF_INET6, entry_offset, off); if (ret) goto out; return 0; out: module_put(t->u.kernel.target->me); release_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; module_put(ematch->u.kernel.match->me); } return ret; } static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; int h; struct xt_entry_match *ematch; origsize = *size; de = *dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); xt_ematch_foreach(ematch, e) xt_compat_match_from_user(ematch, dstptr, size); de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } } static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, void **pentry0, const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; struct ip6t_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; size = compatr->size; info->number = compatr->num_entries; j = 0; xt_compat_lock(AF_INET6); ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries); if (ret) goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; if (j != compatr->num_entries) goto out_unlock; ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; memset(newinfo->entries, 0, size); newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); memcpy(&repl, compatr, sizeof(*compatr)); for (i = 0; i < NF_INET_NUMHOOKS; i++) { repl.hook_entry[i] = newinfo->hook_entry[i]; repl.underflow[i] = newinfo->underflow[i]; } repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); return 0; free_newinfo: xt_free_table_info(newinfo); return ret; out_unlock: xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ip6t_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ip6t_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } struct compat_ip6t_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; struct compat_ip6t_entry entrytable[]; }; static int compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; unsigned int i = 0; struct ip6t_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); pos = userptr; size = total_size; xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) break; } vfree(counters); return ret; } static int compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, int *len) { int ret; struct compat_ip6t_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); else if (!ret) ret = -EAGAIN; xt_compat_flush_offsets(AF_INET6); module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); xt_compat_unlock(AF_INET6); return ret; } #endif static int do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IP6T_SO_SET_REPLACE: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(sock_net(sk), arg, len); else #endif ret = do_replace(sock_net(sk), arg, len); break; case IP6T_SO_SET_ADD_COUNTERS: ret = do_add_counters(sock_net(sk), arg, len); break; default: ret = -EINVAL; } return ret; } static int do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IP6T_SO_GET_INFO: ret = get_info(sock_net(sk), user, len); break; case IP6T_SO_GET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_get_entries(sock_net(sk), user, len); else #endif ret = get_entries(sock_net(sk), user, len); break; case IP6T_SO_GET_REVISION_MATCH: case IP6T_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; int target; if (*len != sizeof(rev)) { ret = -EINVAL; break; } if (copy_from_user(&rev, user, sizeof(rev)) != 0) { ret = -EFAULT; break; } rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; else target = 0; try_then_request_module(xt_find_revision(AF_INET6, rev.name, rev.revision, target, &ret), "ip6t_%s", rev.name); break; } default: ret = -EINVAL; } return ret; } static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; struct ip6t_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); } int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *template_ops) { struct nf_hook_ops *ops; unsigned int num_ops; int ret, i; struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) { xt_free_table_info(newinfo); return ret; } new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ip6t_entry *iter; xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); xt_free_table_info(newinfo); return PTR_ERR(new_table); } if (!template_ops) return 0; num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto out_free; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; goto out_free; } for (i = 0; i < num_ops; i++) ops[i].priv = new_table; new_table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret != 0) goto out_free; return ret; out_free: __ip6t_unregister_table(net, new_table); return ret; } void ip6t_unregister_table_pre_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ip6t_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ static struct xt_target ip6t_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NFPROTO_IPV6, #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, .compat_to_user = compat_standard_to_user, #endif }, { .name = XT_ERROR_TARGET, .target = ip6t_error, .targetsize = XT_FUNCTION_MAXNAMELEN, .family = NFPROTO_IPV6, }, }; static struct nf_sockopt_ops ip6t_sockopts = { .pf = PF_INET6, .set_optmin = IP6T_BASE_CTL, .set_optmax = IP6T_SO_SET_MAX+1, .set = do_ip6t_set_ctl, .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, .owner = THIS_MODULE, }; static int __net_init ip6_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV6); } static void __net_exit ip6_tables_net_exit(struct net *net) { xt_proto_fini(net, NFPROTO_IPV6); } static struct pernet_operations ip6_tables_net_ops = { .init = ip6_tables_net_init, .exit = ip6_tables_net_exit, }; static int __init ip6_tables_init(void) { int ret; ret = register_pernet_subsys(&ip6_tables_net_ops); if (ret < 0) goto err1; /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); if (ret < 0) goto err4; return 0; err4: xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); err2: unregister_pernet_subsys(&ip6_tables_net_ops); err1: return ret; } static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); unregister_pernet_subsys(&ip6_tables_net_ops); } EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); module_init(ip6_tables_init); module_exit(ip6_tables_fini); |
| 121 143 62 84 84 62 62 10 58 13 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 | /* * net/tipc/name_distr.c: TIPC name distribution code * * Copyright (c) 2000-2006, 2014-2019, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * Copyright (c) 2020-2021, Red Hat Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "link.h" #include "name_distr.h" int sysctl_tipc_named_timeout __read_mostly = 2000; /** * publ_to_item - add publication info to a publication message * @p: publication info * @i: location of item in the message */ static void publ_to_item(struct distr_item *i, struct publication *p) { i->type = htonl(p->sr.type); i->lower = htonl(p->sr.lower); i->upper = htonl(p->sr.upper); i->port = htonl(p->sk.ref); i->key = htonl(p->key); } /** * named_prepare_buf - allocate & initialize a publication message * @net: the associated network namespace * @type: message type * @size: payload size * @dest: destination node * * The buffer returned is of size INT_H_SIZE + payload size */ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, u32 dest) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); u32 self = tipc_own_addr(net); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); tipc_msg_init(self, msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest); msg_set_size(msg, INT_H_SIZE + size); } return buf; } /** * tipc_named_publish - tell other nodes about a new publication by this node * @net: the associated network namespace * @p: the new publication */ struct sk_buff *tipc_named_publish(struct net *net, struct publication *p) { struct name_table *nt = tipc_name_table(net); struct distr_item *item; struct sk_buff *skb; if (p->scope == TIPC_NODE_SCOPE) { list_add_tail_rcu(&p->binding_node, &nt->node_scope); return NULL; } write_lock_bh(&nt->cluster_scope_lock); list_add_tail(&p->binding_node, &nt->cluster_scope); write_unlock_bh(&nt->cluster_scope_lock); skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); if (!skb) { pr_warn("Publication distribution failure\n"); return NULL; } msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); msg_set_non_legacy(buf_msg(skb)); item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, p); return skb; } /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node * @net: the associated network namespace * @p: the withdrawn publication */ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p) { struct name_table *nt = tipc_name_table(net); struct distr_item *item; struct sk_buff *skb; write_lock_bh(&nt->cluster_scope_lock); list_del(&p->binding_node); write_unlock_bh(&nt->cluster_scope_lock); if (p->scope == TIPC_NODE_SCOPE) return NULL; skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); if (!skb) { pr_warn("Withdrawal distribution failure\n"); return NULL; } msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); msg_set_non_legacy(buf_msg(skb)); item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, p); return skb; } /** * named_distribute - prepare name info for bulk distribution to another node * @net: the associated network namespace * @list: list of messages (buffers) to be returned from this function * @dnode: node to be updated * @pls: linked list of publication items to be packed into buffer chain * @seqno: sequence number for this message */ static void named_distribute(struct net *net, struct sk_buff_head *list, u32 dnode, struct list_head *pls, u16 seqno) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) / ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; struct tipc_msg *hdr; list_for_each_entry(publ, pls, binding_node) { /* Prepare next buffer: */ if (!skb) { skb = named_prepare_buf(net, PUBLICATION, msg_rem, dnode); if (!skb) { pr_warn("Bulk publication failure\n"); return; } hdr = buf_msg(skb); msg_set_bc_ack_invalid(hdr, true); msg_set_bulk(hdr); msg_set_non_legacy(hdr); item = (struct distr_item *)msg_data(hdr); } /* Pack publication into message: */ publ_to_item(item, publ); item++; msg_rem -= ITEM_SIZE; /* Append full buffer to list: */ if (!msg_rem) { __skb_queue_tail(list, skb); skb = NULL; msg_rem = msg_dsz; } } if (skb) { hdr = buf_msg(skb); msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem)); skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); __skb_queue_tail(list, skb); } hdr = buf_msg(skb_peek_tail(list)); msg_set_last_bulk(hdr); msg_set_named_seqno(hdr, seqno); } /** * tipc_named_node_up - tell specified node about all publications by this node * @net: the associated network namespace * @dnode: destination node * @capabilities: peer node's capabilities */ void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); struct sk_buff_head head; u16 seqno; __skb_queue_head_init(&head); spin_lock_bh(&tn->nametbl_lock); if (!(capabilities & TIPC_NAMED_BCAST)) nt->rc_dests++; seqno = nt->snd_nxt; spin_unlock_bh(&tn->nametbl_lock); read_lock_bh(&nt->cluster_scope_lock); named_distribute(net, &head, dnode, &nt->cluster_scope, seqno); tipc_node_xmit(net, &head, dnode, 0); read_unlock_bh(&nt->cluster_scope_lock); } /** * tipc_publ_purge - remove publication associated with a failed node * @net: the associated network namespace * @p: the publication to remove * @addr: failed node's address * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr) { struct tipc_net *tn = tipc_net(net); struct publication *_p; struct tipc_uaddr ua; tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type, p->sr.lower, p->sr.upper); spin_lock_bh(&tn->nametbl_lock); _p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key); if (_p) tipc_node_unsubscribe(net, &_p->binding_node, addr); spin_unlock_bh(&tn->nametbl_lock); if (_p) kfree_rcu(_p, rcu); } void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr, u16 capabilities) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); spin_lock_bh(&tn->nametbl_lock); if (!(capabilities & TIPC_NAMED_BCAST)) nt->rc_dests--; spin_unlock_bh(&tn->nametbl_lock); } /** * tipc_update_nametbl - try to process a nametable update and notify * subscribers * @net: the associated network namespace * @i: location of item in the message * @node: node address * @dtype: name distributor message type * * tipc_nametbl_lock must be held. * Return: the publication item if successful, otherwise NULL. */ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, u32 node, u32 dtype) { struct publication *p = NULL; struct tipc_socket_addr sk; struct tipc_uaddr ua; u32 key = ntohl(i->key); tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE, ntohl(i->type), ntohl(i->lower), ntohl(i->upper)); sk.ref = ntohl(i->port); sk.node = node; if (dtype == PUBLICATION) { p = tipc_nametbl_insert_publ(net, &ua, &sk, key); if (p) { tipc_node_subscribe(net, &p->binding_node, node); return true; } } else if (dtype == WITHDRAWAL) { p = tipc_nametbl_remove_publ(net, &ua, &sk, key); if (p) { tipc_node_unsubscribe(net, &p->binding_node, node); kfree_rcu(p, rcu); return true; } pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n", ua.sr.type, ua.sr.lower, node); } else { pr_warn_ratelimited("Unknown name table message received\n"); } return false; } static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, u16 *rcv_nxt, bool *open) { struct sk_buff *skb, *tmp; struct tipc_msg *hdr; u16 seqno; spin_lock_bh(&namedq->lock); skb_queue_walk_safe(namedq, skb, tmp) { if (unlikely(skb_linearize(skb))) { __skb_unlink(skb, namedq); kfree_skb(skb); continue; } hdr = buf_msg(skb); seqno = msg_named_seqno(hdr); if (msg_is_last_bulk(hdr)) { *rcv_nxt = seqno; *open = true; } if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { __skb_unlink(skb, namedq); spin_unlock_bh(&namedq->lock); return skb; } if (*open && (*rcv_nxt == seqno)) { (*rcv_nxt)++; __skb_unlink(skb, namedq); spin_unlock_bh(&namedq->lock); return skb; } if (less(seqno, *rcv_nxt)) { __skb_unlink(skb, namedq); kfree_skb(skb); continue; } } spin_unlock_bh(&namedq->lock); return NULL; } /** * tipc_named_rcv - process name table update messages sent by another node * @net: the associated network namespace * @namedq: queue to receive from * @rcv_nxt: store last received seqno here * @open: last bulk msg was received (FIXME) */ void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, u16 *rcv_nxt, bool *open) { struct tipc_net *tn = tipc_net(net); struct distr_item *item; struct tipc_msg *hdr; struct sk_buff *skb; u32 count, node; spin_lock_bh(&tn->nametbl_lock); while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) { hdr = buf_msg(skb); node = msg_orignode(hdr); item = (struct distr_item *)msg_data(hdr); count = msg_data_sz(hdr) / ITEM_SIZE; while (count--) { tipc_update_nametbl(net, item, node, msg_type(hdr)); item++; } kfree_skb(skb); } spin_unlock_bh(&tn->nametbl_lock); } /** * tipc_named_reinit - re-initialize local publications * @net: the associated network namespace * * This routine is called whenever TIPC networking is enabled. * All name table entries published by this node are updated to reflect * the node's new network address. */ void tipc_named_reinit(struct net *net) { struct name_table *nt = tipc_name_table(net); struct tipc_net *tn = tipc_net(net); struct publication *p; u32 self = tipc_own_addr(net); spin_lock_bh(&tn->nametbl_lock); list_for_each_entry_rcu(p, &nt->node_scope, binding_node) p->sk.node = self; list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node) p->sk.node = self; nt->rc_dests = 0; spin_unlock_bh(&tn->nametbl_lock); } |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Media entity * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * Sakari Ailus <sakari.ailus@iki.fi> */ #ifndef _MEDIA_ENTITY_H #define _MEDIA_ENTITY_H #include <linux/bitmap.h> #include <linux/bug.h> #include <linux/container_of.h> #include <linux/fwnode.h> #include <linux/list.h> #include <linux/media.h> #include <linux/minmax.h> #include <linux/types.h> /* Enums used internally at the media controller to represent graphs */ /** * enum media_gobj_type - type of a graph object * * @MEDIA_GRAPH_ENTITY: Identify a media entity * @MEDIA_GRAPH_PAD: Identify a media pad * @MEDIA_GRAPH_LINK: Identify a media link * @MEDIA_GRAPH_INTF_DEVNODE: Identify a media Kernel API interface via * a device node */ enum media_gobj_type { MEDIA_GRAPH_ENTITY, MEDIA_GRAPH_PAD, MEDIA_GRAPH_LINK, MEDIA_GRAPH_INTF_DEVNODE, }; #define MEDIA_BITS_PER_TYPE 8 #define MEDIA_BITS_PER_ID (32 - MEDIA_BITS_PER_TYPE) #define MEDIA_ID_MASK GENMASK_ULL(MEDIA_BITS_PER_ID - 1, 0) /* Structs to represent the objects that belong to a media graph */ /** * struct media_gobj - Define a graph object. * * @mdev: Pointer to the struct &media_device that owns the object * @id: Non-zero object ID identifier. The ID should be unique * inside a media_device, as it is composed by * %MEDIA_BITS_PER_TYPE to store the type plus * %MEDIA_BITS_PER_ID to store the ID * @list: List entry stored in one of the per-type mdev object lists * * All objects on the media graph should have this struct embedded */ struct media_gobj { struct media_device *mdev; u32 id; struct list_head list; }; #define MEDIA_ENTITY_ENUM_MAX_DEPTH 16 /** * struct media_entity_enum - An enumeration of media entities. * * @bmap: Bit map in which each bit represents one entity at struct * media_entity->internal_idx. * @idx_max: Number of bits in bmap */ struct media_entity_enum { unsigned long *bmap; int idx_max; }; /** * struct media_graph - Media graph traversal state * * @stack: Graph traversal stack; the stack contains information * on the path the media entities to be walked and the * links through which they were reached. * @stack.entity: pointer to &struct media_entity at the graph. * @stack.link: pointer to &struct list_head. * @ent_enum: Visited entities * @top: The top of the stack */ struct media_graph { struct { struct media_entity *entity; struct list_head *link; } stack[MEDIA_ENTITY_ENUM_MAX_DEPTH]; struct media_entity_enum ent_enum; int top; }; /** * struct media_pipeline - Media pipeline related information * * @allocated: Media pipeline allocated and freed by the framework * @mdev: The media device the pipeline is part of * @pads: List of media_pipeline_pad * @start_count: Media pipeline start - stop count */ struct media_pipeline { bool allocated; struct media_device *mdev; struct list_head pads; int start_count; }; /** * struct media_pipeline_pad - A pad part of a media pipeline * * @list: Entry in the media_pad pads list * @pipe: The media_pipeline that the pad is part of * @pad: The media pad * * This structure associate a pad with a media pipeline. Instances of * media_pipeline_pad are created by media_pipeline_start() when it builds the * pipeline, and stored in the &media_pad.pads list. media_pipeline_stop() * removes the entries from the list and deletes them. */ struct media_pipeline_pad { struct list_head list; struct media_pipeline *pipe; struct media_pad *pad; }; /** * struct media_pipeline_pad_iter - Iterator for media_pipeline_for_each_pad * * @cursor: The current element */ struct media_pipeline_pad_iter { struct list_head *cursor; }; /** * struct media_pipeline_entity_iter - Iterator for media_pipeline_for_each_entity * * @ent_enum: The entity enumeration tracker * @cursor: The current element */ struct media_pipeline_entity_iter { struct media_entity_enum ent_enum; struct list_head *cursor; }; /** * struct media_link - A link object part of a media graph. * * @graph_obj: Embedded structure containing the media object common data * @list: Linked list associated with an entity or an interface that * owns the link. * @gobj0: Part of a union. Used to get the pointer for the first * graph_object of the link. * @source: Part of a union. Used only if the first object (gobj0) is * a pad. In that case, it represents the source pad. * @intf: Part of a union. Used only if the first object (gobj0) is * an interface. * @gobj1: Part of a union. Used to get the pointer for the second * graph_object of the link. * @sink: Part of a union. Used only if the second object (gobj1) is * a pad. In that case, it represents the sink pad. * @entity: Part of a union. Used only if the second object (gobj1) is * an entity. * @reverse: Pointer to the link for the reverse direction of a pad to pad * link. * @flags: Link flags, as defined in uapi/media.h (MEDIA_LNK_FL_*) * @is_backlink: Indicate if the link is a backlink. */ struct media_link { struct media_gobj graph_obj; struct list_head list; union { struct media_gobj *gobj0; struct media_pad *source; struct media_interface *intf; }; union { struct media_gobj *gobj1; struct media_pad *sink; struct media_entity *entity; }; struct media_link *reverse; unsigned long flags; bool is_backlink; }; /** * enum media_pad_signal_type - type of the signal inside a media pad * * @PAD_SIGNAL_DEFAULT: * Default signal. Use this when all inputs or all outputs are * uniquely identified by the pad number. * @PAD_SIGNAL_ANALOG: * The pad contains an analog signal. It can be Radio Frequency, * Intermediate Frequency, a baseband signal or sub-carriers. * Tuner inputs, IF-PLL demodulators, composite and s-video signals * should use it. * @PAD_SIGNAL_DV: * Contains a digital video signal, with can be a bitstream of samples * taken from an analog TV video source. On such case, it usually * contains the VBI data on it. * @PAD_SIGNAL_AUDIO: * Contains an Intermediate Frequency analog signal from an audio * sub-carrier or an audio bitstream. IF signals are provided by tuners * and consumed by audio AM/FM decoders. Bitstream audio is provided by * an audio decoder. */ enum media_pad_signal_type { PAD_SIGNAL_DEFAULT = 0, PAD_SIGNAL_ANALOG, PAD_SIGNAL_DV, PAD_SIGNAL_AUDIO, }; /** * struct media_pad - A media pad graph object. * * @graph_obj: Embedded structure containing the media object common data * @entity: Entity this pad belongs to * @index: Pad index in the entity pads array, numbered from 0 to n * @num_links: Number of links connected to this pad * @sig_type: Type of the signal inside a media pad * @flags: Pad flags, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * (seek for ``MEDIA_PAD_FL_*``) * @pipe: Pipeline this pad belongs to. Use media_entity_pipeline() to * access this field. */ struct media_pad { struct media_gobj graph_obj; /* must be first field in struct */ struct media_entity *entity; u16 index; u16 num_links; enum media_pad_signal_type sig_type; unsigned long flags; /* * The fields below are private, and should only be accessed via * appropriate functions. */ struct media_pipeline *pipe; }; /** * struct media_entity_operations - Media entity operations * @get_fwnode_pad: Return the pad number based on a fwnode endpoint or * a negative value on error. This operation can be used * to map a fwnode to a media pad number. Optional. * @link_setup: Notify the entity of link changes. The operation can * return an error, in which case link setup will be * cancelled. Optional. * @link_validate: Return whether a link is valid from the entity point of * view. The media_pipeline_start() function * validates all links by calling this operation. Optional. * @has_pad_interdep: Return whether two pads of the entity are * interdependent. If two pads are interdependent they are * part of the same pipeline and enabling one of the pads * means that the other pad will become "locked" and * doesn't allow configuration changes. pad0 and pad1 are * guaranteed to not both be sinks or sources. Never call * the .has_pad_interdep() operation directly, always use * media_entity_has_pad_interdep(). * Optional: If the operation isn't implemented all pads * will be considered as interdependent. * * .. note:: * * Those these callbacks are called with struct &media_device.graph_mutex * mutex held. */ struct media_entity_operations { int (*get_fwnode_pad)(struct media_entity *entity, struct fwnode_endpoint *endpoint); int (*link_setup)(struct media_entity *entity, const struct media_pad *local, const struct media_pad *remote, u32 flags); int (*link_validate)(struct media_link *link); bool (*has_pad_interdep)(struct media_entity *entity, unsigned int pad0, unsigned int pad1); }; /** * enum media_entity_type - Media entity type * * @MEDIA_ENTITY_TYPE_BASE: * The entity isn't embedded in another subsystem structure. * @MEDIA_ENTITY_TYPE_VIDEO_DEVICE: * The entity is embedded in a struct video_device instance. * @MEDIA_ENTITY_TYPE_V4L2_SUBDEV: * The entity is embedded in a struct v4l2_subdev instance. * * Media entity objects are often not instantiated directly, but the media * entity structure is inherited by (through embedding) other subsystem-specific * structures. The media entity type identifies the type of the subclass * structure that implements a media entity instance. * * This allows runtime type identification of media entities and safe casting to * the correct object type. For instance, a media entity structure instance * embedded in a v4l2_subdev structure instance will have the type * %MEDIA_ENTITY_TYPE_V4L2_SUBDEV and can safely be cast to a &v4l2_subdev * structure using the container_of() macro. */ enum media_entity_type { MEDIA_ENTITY_TYPE_BASE, MEDIA_ENTITY_TYPE_VIDEO_DEVICE, MEDIA_ENTITY_TYPE_V4L2_SUBDEV, }; /** * struct media_entity - A media entity graph object. * * @graph_obj: Embedded structure containing the media object common data. * @name: Entity name. * @obj_type: Type of the object that implements the media_entity. * @function: Entity main function, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * (seek for ``MEDIA_ENT_F_*``) * @flags: Entity flags, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * (seek for ``MEDIA_ENT_FL_*``) * @num_pads: Number of sink and source pads. * @num_links: Total number of links, forward and back, enabled and disabled. * @num_backlinks: Number of backlinks * @internal_idx: An unique internal entity specific number. The numbers are * re-used if entities are unregistered or registered again. * @pads: Pads array with the size defined by @num_pads. * @links: List of data links. * @ops: Entity operations. * @use_count: Use count for the entity. * @info: Union with devnode information. Kept just for backward * compatibility. * @info.dev: Contains device major and minor info. * @info.dev.major: device node major, if the device is a devnode. * @info.dev.minor: device node minor, if the device is a devnode. * * .. note:: * * The @use_count reference count must never be negative, but is a signed * integer on purpose: a simple ``WARN_ON(<0)`` check can be used to detect * reference count bugs that would make it negative. */ struct media_entity { struct media_gobj graph_obj; /* must be first field in struct */ const char *name; enum media_entity_type obj_type; u32 function; unsigned long flags; u16 num_pads; u16 num_links; u16 num_backlinks; int internal_idx; struct media_pad *pads; struct list_head links; const struct media_entity_operations *ops; int use_count; union { struct { u32 major; u32 minor; } dev; } info; }; /** * media_entity_for_each_pad - Iterate on all pads in an entity * @entity: The entity the pads belong to * @iter: The iterator pad * * Iterate on all pads in a media entity. */ #define media_entity_for_each_pad(entity, iter) \ for (iter = (entity)->pads; \ iter < &(entity)->pads[(entity)->num_pads]; \ ++iter) /** * struct media_interface - A media interface graph object. * * @graph_obj: embedded graph object * @links: List of links pointing to graph entities * @type: Type of the interface as defined in * :ref:`include/uapi/linux/media.h <media_header>` * (seek for ``MEDIA_INTF_T_*``) * @flags: Interface flags as defined in * :ref:`include/uapi/linux/media.h <media_header>` * (seek for ``MEDIA_INTF_FL_*``) * * .. note:: * * Currently, no flags for &media_interface is defined. */ struct media_interface { struct media_gobj graph_obj; struct list_head links; u32 type; u32 flags; }; /** * struct media_intf_devnode - A media interface via a device node. * * @intf: embedded interface object * @major: Major number of a device node * @minor: Minor number of a device node */ struct media_intf_devnode { struct media_interface intf; /* Should match the fields at media_v2_intf_devnode */ u32 major; u32 minor; }; /** * media_entity_id() - return the media entity graph object id * * @entity: pointer to &media_entity */ static inline u32 media_entity_id(struct media_entity *entity) { return entity->graph_obj.id; } /** * media_type() - return the media object type * * @gobj: Pointer to the struct &media_gobj graph object */ static inline enum media_gobj_type media_type(struct media_gobj *gobj) { return gobj->id >> MEDIA_BITS_PER_ID; } /** * media_id() - return the media object ID * * @gobj: Pointer to the struct &media_gobj graph object */ static inline u32 media_id(struct media_gobj *gobj) { return gobj->id & MEDIA_ID_MASK; } /** * media_gobj_gen_id() - encapsulates type and ID on at the object ID * * @type: object type as define at enum &media_gobj_type. * @local_id: next ID, from struct &media_device.id. */ static inline u32 media_gobj_gen_id(enum media_gobj_type type, u64 local_id) { u32 id; id = type << MEDIA_BITS_PER_ID; id |= local_id & MEDIA_ID_MASK; return id; } /** * is_media_entity_v4l2_video_device() - Check if the entity is a video_device * @entity: pointer to entity * * Return: %true if the entity is an instance of a video_device object and can * safely be cast to a struct video_device using the container_of() macro, or * %false otherwise. */ static inline bool is_media_entity_v4l2_video_device(struct media_entity *entity) { return entity && entity->obj_type == MEDIA_ENTITY_TYPE_VIDEO_DEVICE; } /** * is_media_entity_v4l2_subdev() - Check if the entity is a v4l2_subdev * @entity: pointer to entity * * Return: %true if the entity is an instance of a &v4l2_subdev object and can * safely be cast to a struct &v4l2_subdev using the container_of() macro, or * %false otherwise. */ static inline bool is_media_entity_v4l2_subdev(struct media_entity *entity) { return entity && entity->obj_type == MEDIA_ENTITY_TYPE_V4L2_SUBDEV; } /** * media_entity_enum_init - Initialise an entity enumeration * * @ent_enum: Entity enumeration to be initialised * @mdev: The related media device * * Return: zero on success or a negative error code. */ __must_check int media_entity_enum_init(struct media_entity_enum *ent_enum, struct media_device *mdev); /** * media_entity_enum_cleanup - Release resources of an entity enumeration * * @ent_enum: Entity enumeration to be released */ void media_entity_enum_cleanup(struct media_entity_enum *ent_enum); /** * media_entity_enum_zero - Clear the entire enum * * @ent_enum: Entity enumeration to be cleared */ static inline void media_entity_enum_zero(struct media_entity_enum *ent_enum) { bitmap_zero(ent_enum->bmap, ent_enum->idx_max); } /** * media_entity_enum_set - Mark a single entity in the enum * * @ent_enum: Entity enumeration * @entity: Entity to be marked */ static inline void media_entity_enum_set(struct media_entity_enum *ent_enum, struct media_entity *entity) { if (WARN_ON(entity->internal_idx >= ent_enum->idx_max)) return; __set_bit(entity->internal_idx, ent_enum->bmap); } /** * media_entity_enum_clear - Unmark a single entity in the enum * * @ent_enum: Entity enumeration * @entity: Entity to be unmarked */ static inline void media_entity_enum_clear(struct media_entity_enum *ent_enum, struct media_entity *entity) { if (WARN_ON(entity->internal_idx >= ent_enum->idx_max)) return; __clear_bit(entity->internal_idx, ent_enum->bmap); } /** * media_entity_enum_test - Test whether the entity is marked * * @ent_enum: Entity enumeration * @entity: Entity to be tested * * Returns %true if the entity was marked. */ static inline bool media_entity_enum_test(struct media_entity_enum *ent_enum, struct media_entity *entity) { if (WARN_ON(entity->internal_idx >= ent_enum->idx_max)) return true; return test_bit(entity->internal_idx, ent_enum->bmap); } /** * media_entity_enum_test_and_set - Test whether the entity is marked, * and mark it * * @ent_enum: Entity enumeration * @entity: Entity to be tested * * Returns %true if the entity was marked, and mark it before doing so. */ static inline bool media_entity_enum_test_and_set(struct media_entity_enum *ent_enum, struct media_entity *entity) { if (WARN_ON(entity->internal_idx >= ent_enum->idx_max)) return true; return __test_and_set_bit(entity->internal_idx, ent_enum->bmap); } /** * media_entity_enum_empty - Test whether the entire enum is empty * * @ent_enum: Entity enumeration * * Return: %true if the entity was empty. */ static inline bool media_entity_enum_empty(struct media_entity_enum *ent_enum) { return bitmap_empty(ent_enum->bmap, ent_enum->idx_max); } /** * media_entity_enum_intersects - Test whether two enums intersect * * @ent_enum1: First entity enumeration * @ent_enum2: Second entity enumeration * * Return: %true if entity enumerations @ent_enum1 and @ent_enum2 intersect, * otherwise %false. */ static inline bool media_entity_enum_intersects( struct media_entity_enum *ent_enum1, struct media_entity_enum *ent_enum2) { WARN_ON(ent_enum1->idx_max != ent_enum2->idx_max); return bitmap_intersects(ent_enum1->bmap, ent_enum2->bmap, min(ent_enum1->idx_max, ent_enum2->idx_max)); } /** * gobj_to_entity - returns the struct &media_entity pointer from the * @gobj contained on it. * * @gobj: Pointer to the struct &media_gobj graph object */ #define gobj_to_entity(gobj) \ container_of(gobj, struct media_entity, graph_obj) /** * gobj_to_pad - returns the struct &media_pad pointer from the * @gobj contained on it. * * @gobj: Pointer to the struct &media_gobj graph object */ #define gobj_to_pad(gobj) \ container_of(gobj, struct media_pad, graph_obj) /** * gobj_to_link - returns the struct &media_link pointer from the * @gobj contained on it. * * @gobj: Pointer to the struct &media_gobj graph object */ #define gobj_to_link(gobj) \ container_of(gobj, struct media_link, graph_obj) /** * gobj_to_intf - returns the struct &media_interface pointer from the * @gobj contained on it. * * @gobj: Pointer to the struct &media_gobj graph object */ #define gobj_to_intf(gobj) \ container_of(gobj, struct media_interface, graph_obj) /** * intf_to_devnode - returns the struct media_intf_devnode pointer from the * @intf contained on it. * * @intf: Pointer to struct &media_intf_devnode */ #define intf_to_devnode(intf) \ container_of(intf, struct media_intf_devnode, intf) /** * media_gobj_create - Initialize a graph object * * @mdev: Pointer to the &media_device that contains the object * @type: Type of the object * @gobj: Pointer to the struct &media_gobj graph object * * This routine initializes the embedded struct &media_gobj inside a * media graph object. It is called automatically if ``media_*_create`` * function calls are used. However, if the object (entity, link, pad, * interface) is embedded on some other object, this function should be * called before registering the object at the media controller. */ void media_gobj_create(struct media_device *mdev, enum media_gobj_type type, struct media_gobj *gobj); /** * media_gobj_destroy - Stop using a graph object on a media device * * @gobj: Pointer to the struct &media_gobj graph object * * This should be called by all routines like media_device_unregister() * that remove/destroy media graph objects. */ void media_gobj_destroy(struct media_gobj *gobj); /** * media_entity_pads_init() - Initialize the entity pads * * @entity: entity where the pads belong * @num_pads: total number of sink and source pads * @pads: Array of @num_pads pads. * * The pads array is managed by the entity driver and passed to * media_entity_pads_init() where its pointer will be stored in the * &media_entity structure. * * If no pads are needed, drivers could either directly fill * &media_entity->num_pads with 0 and &media_entity->pads with %NULL or call * this function that will do the same. * * As the number of pads is known in advance, the pads array is not allocated * dynamically but is managed by the entity driver. Most drivers will embed the * pads array in a driver-specific structure, avoiding dynamic allocation. * * Drivers must set the direction of every pad in the pads array before calling * media_entity_pads_init(). The function will initialize the other pads fields. */ int media_entity_pads_init(struct media_entity *entity, u16 num_pads, struct media_pad *pads); /** * media_entity_cleanup() - free resources associated with an entity * * @entity: entity where the pads belong * * This function must be called during the cleanup phase after unregistering * the entity (currently, it does nothing). * * Calling media_entity_cleanup() on a media_entity whose memory has been * zeroed but that has not been initialized with media_entity_pad_init() is * valid and is a no-op. */ #if IS_ENABLED(CONFIG_MEDIA_CONTROLLER) static inline void media_entity_cleanup(struct media_entity *entity) {} #else #define media_entity_cleanup(entity) do { } while (false) #endif /** * media_get_pad_index() - retrieves a pad index from an entity * * @entity: entity where the pads belong * @pad_type: the type of the pad, one of MEDIA_PAD_FL_* pad types * @sig_type: type of signal of the pad to be search * * This helper function finds the first pad index inside an entity that * satisfies both @is_sink and @sig_type conditions. * * Return: * * On success, return the pad number. If the pad was not found or the media * entity is a NULL pointer, return -EINVAL. */ int media_get_pad_index(struct media_entity *entity, u32 pad_type, enum media_pad_signal_type sig_type); /** * media_create_pad_link() - creates a link between two entities. * * @source: pointer to &media_entity of the source pad. * @source_pad: number of the source pad in the pads array * @sink: pointer to &media_entity of the sink pad. * @sink_pad: number of the sink pad in the pads array. * @flags: Link flags, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * ( seek for ``MEDIA_LNK_FL_*``) * * Valid values for flags: * * %MEDIA_LNK_FL_ENABLED * Indicates that the link is enabled and can be used to transfer media data. * When two or more links target a sink pad, only one of them can be * enabled at a time. * * %MEDIA_LNK_FL_IMMUTABLE * Indicates that the link enabled state can't be modified at runtime. If * %MEDIA_LNK_FL_IMMUTABLE is set, then %MEDIA_LNK_FL_ENABLED must also be * set, since an immutable link is always enabled. * * .. note:: * * Before calling this function, media_entity_pads_init() and * media_device_register_entity() should be called previously for both ends. */ __must_check int media_create_pad_link(struct media_entity *source, u16 source_pad, struct media_entity *sink, u16 sink_pad, u32 flags); /** * media_create_pad_links() - creates a link between two entities. * * @mdev: Pointer to the media_device that contains the object * @source_function: Function of the source entities. Used only if @source is * NULL. * @source: pointer to &media_entity of the source pad. If NULL, it will use * all entities that matches the @sink_function. * @source_pad: number of the source pad in the pads array * @sink_function: Function of the sink entities. Used only if @sink is NULL. * @sink: pointer to &media_entity of the sink pad. If NULL, it will use * all entities that matches the @sink_function. * @sink_pad: number of the sink pad in the pads array. * @flags: Link flags, as defined in include/uapi/linux/media.h. * @allow_both_undefined: if %true, then both @source and @sink can be NULL. * In such case, it will create a crossbar between all entities that * matches @source_function to all entities that matches @sink_function. * If %false, it will return 0 and won't create any link if both @source * and @sink are NULL. * * Valid values for flags: * * A %MEDIA_LNK_FL_ENABLED flag indicates that the link is enabled and can be * used to transfer media data. If multiple links are created and this * flag is passed as an argument, only the first created link will have * this flag. * * A %MEDIA_LNK_FL_IMMUTABLE flag indicates that the link enabled state can't * be modified at runtime. If %MEDIA_LNK_FL_IMMUTABLE is set, then * %MEDIA_LNK_FL_ENABLED must also be set since an immutable link is * always enabled. * * It is common for some devices to have multiple source and/or sink entities * of the same type that should be linked. While media_create_pad_link() * creates link by link, this function is meant to allow 1:n, n:1 and even * cross-bar (n:n) links. * * .. note:: * * Before calling this function, media_entity_pads_init() and * media_device_register_entity() should be called previously for the * entities to be linked. */ int media_create_pad_links(const struct media_device *mdev, const u32 source_function, struct media_entity *source, const u16 source_pad, const u32 sink_function, struct media_entity *sink, const u16 sink_pad, u32 flags, const bool allow_both_undefined); void __media_entity_remove_links(struct media_entity *entity); /** * media_entity_remove_links() - remove all links associated with an entity * * @entity: pointer to &media_entity * * .. note:: * * This is called automatically when an entity is unregistered via * media_device_register_entity(). */ void media_entity_remove_links(struct media_entity *entity); /** * __media_entity_setup_link - Configure a media link without locking * @link: The link being configured * @flags: Link configuration flags * * The bulk of link setup is handled by the two entities connected through the * link. This function notifies both entities of the link configuration change. * * If the link is immutable or if the current and new configuration are * identical, return immediately. * * The user is expected to hold link->source->parent->mutex. If not, * media_entity_setup_link() should be used instead. */ int __media_entity_setup_link(struct media_link *link, u32 flags); /** * media_entity_setup_link() - changes the link flags properties in runtime * * @link: pointer to &media_link * @flags: the requested new link flags * * The only configurable property is the %MEDIA_LNK_FL_ENABLED link flag * to enable/disable a link. Links marked with the * %MEDIA_LNK_FL_IMMUTABLE link flag can not be enabled or disabled. * * When a link is enabled or disabled, the media framework calls the * link_setup operation for the two entities at the source and sink of the * link, in that order. If the second link_setup call fails, another * link_setup call is made on the first entity to restore the original link * flags. * * Media device drivers can be notified of link setup operations by setting the * &media_device.link_notify pointer to a callback function. If provided, the * notification callback will be called before enabling and after disabling * links. * * Entity drivers must implement the link_setup operation if any of their links * is non-immutable. The operation must either configure the hardware or store * the configuration information to be applied later. * * Link configuration must not have any side effect on other links. If an * enabled link at a sink pad prevents another link at the same pad from * being enabled, the link_setup operation must return %-EBUSY and can't * implicitly disable the first enabled link. * * .. note:: * * The valid values of the flags for the link is the same as described * on media_create_pad_link(), for pad to pad links or the same as described * on media_create_intf_link(), for interface to entity links. */ int media_entity_setup_link(struct media_link *link, u32 flags); /** * media_entity_find_link - Find a link between two pads * @source: Source pad * @sink: Sink pad * * Return: returns a pointer to the link between the two entities. If no * such link exists, return %NULL. */ struct media_link *media_entity_find_link(struct media_pad *source, struct media_pad *sink); /** * media_pad_remote_pad_first - Find the first pad at the remote end of a link * @pad: Pad at the local end of the link * * Search for a remote pad connected to the given pad by iterating over all * links originating or terminating at that pad until an enabled link is found. * * Return: returns a pointer to the pad at the remote end of the first found * enabled link, or %NULL if no enabled link has been found. */ struct media_pad *media_pad_remote_pad_first(const struct media_pad *pad); /** * media_pad_remote_pad_unique - Find a remote pad connected to a pad * @pad: The pad * * Search for and return a remote pad connected to @pad through an enabled * link. If multiple (or no) remote pads are found, an error is returned. * * The uniqueness constraint makes this helper function suitable for entities * that support a single active source at a time on a given pad. * * Return: A pointer to the remote pad, or one of the following error pointers * if an error occurs: * * * -ENOTUNIQ - Multiple links are enabled * * -ENOLINK - No connected pad found */ struct media_pad *media_pad_remote_pad_unique(const struct media_pad *pad); /** * media_entity_remote_pad_unique - Find a remote pad connected to an entity * @entity: The entity * @type: The type of pad to find (MEDIA_PAD_FL_SINK or MEDIA_PAD_FL_SOURCE) * * Search for and return a remote pad of @type connected to @entity through an * enabled link. If multiple (or no) remote pads match these criteria, an error * is returned. * * The uniqueness constraint makes this helper function suitable for entities * that support a single active source or sink at a time. * * Return: A pointer to the remote pad, or one of the following error pointers * if an error occurs: * * * -ENOTUNIQ - Multiple links are enabled * * -ENOLINK - No connected pad found */ struct media_pad * media_entity_remote_pad_unique(const struct media_entity *entity, unsigned int type); /** * media_entity_remote_source_pad_unique - Find a remote source pad connected to * an entity * @entity: The entity * * Search for and return a remote source pad connected to @entity through an * enabled link. If multiple (or no) remote pads match these criteria, an error * is returned. * * The uniqueness constraint makes this helper function suitable for entities * that support a single active source at a time. * * Return: A pointer to the remote pad, or one of the following error pointers * if an error occurs: * * * -ENOTUNIQ - Multiple links are enabled * * -ENOLINK - No connected pad found */ static inline struct media_pad * media_entity_remote_source_pad_unique(const struct media_entity *entity) { return media_entity_remote_pad_unique(entity, MEDIA_PAD_FL_SOURCE); } /** * media_pad_is_streaming - Test if a pad is part of a streaming pipeline * @pad: The pad * * Return: True if the pad is part of a pipeline started with the * media_pipeline_start() function, false otherwise. */ static inline bool media_pad_is_streaming(const struct media_pad *pad) { return pad->pipe; } /** * media_entity_is_streaming - Test if an entity is part of a streaming pipeline * @entity: The entity * * Return: True if the entity is part of a pipeline started with the * media_pipeline_start() function, false otherwise. */ static inline bool media_entity_is_streaming(const struct media_entity *entity) { struct media_pad *pad; media_entity_for_each_pad(entity, pad) { if (media_pad_is_streaming(pad)) return true; } return false; } /** * media_entity_pipeline - Get the media pipeline an entity is part of * @entity: The entity * * DEPRECATED: use media_pad_pipeline() instead. * * This function returns the media pipeline that an entity has been associated * with when constructing the pipeline with media_pipeline_start(). The pointer * remains valid until media_pipeline_stop() is called. * * In general, entities can be part of multiple pipelines, when carrying * multiple streams (either on different pads, or on the same pad using * multiplexed streams). This function is to be used only for entities that * do not support multiple pipelines. * * Return: The media_pipeline the entity is part of, or NULL if the entity is * not part of any pipeline. */ struct media_pipeline *media_entity_pipeline(struct media_entity *entity); /** * media_pad_pipeline - Get the media pipeline a pad is part of * @pad: The pad * * This function returns the media pipeline that a pad has been associated * with when constructing the pipeline with media_pipeline_start(). The pointer * remains valid until media_pipeline_stop() is called. * * Return: The media_pipeline the pad is part of, or NULL if the pad is * not part of any pipeline. */ struct media_pipeline *media_pad_pipeline(struct media_pad *pad); /** * media_entity_get_fwnode_pad - Get pad number from fwnode * * @entity: The entity * @fwnode: Pointer to the fwnode_handle which should be used to find the pad * @direction_flags: Expected direction of the pad, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * (seek for ``MEDIA_PAD_FL_*``) * * This function can be used to resolve the media pad number from * a fwnode. This is useful for devices which use more complex * mappings of media pads. * * If the entity does not implement the get_fwnode_pad() operation * then this function searches the entity for the first pad that * matches the @direction_flags. * * Return: returns the pad number on success or a negative error code. */ int media_entity_get_fwnode_pad(struct media_entity *entity, const struct fwnode_handle *fwnode, unsigned long direction_flags); /** * media_graph_walk_init - Allocate resources used by graph walk. * * @graph: Media graph structure that will be used to walk the graph * @mdev: Pointer to the &media_device that contains the object * * This function is deprecated, use media_pipeline_for_each_pad() instead. * * The caller is required to hold the media_device graph_mutex during the graph * walk until the graph state is released. * * Returns zero on success or a negative error code otherwise. */ __must_check int media_graph_walk_init( struct media_graph *graph, struct media_device *mdev); /** * media_graph_walk_cleanup - Release resources used by graph walk. * * @graph: Media graph structure that will be used to walk the graph * * This function is deprecated, use media_pipeline_for_each_pad() instead. */ void media_graph_walk_cleanup(struct media_graph *graph); /** * media_graph_walk_start - Start walking the media graph at a * given entity * * @graph: Media graph structure that will be used to walk the graph * @entity: Starting entity * * This function is deprecated, use media_pipeline_for_each_pad() instead. * * Before using this function, media_graph_walk_init() must be * used to allocate resources used for walking the graph. This * function initializes the graph traversal structure to walk the * entities graph starting at the given entity. The traversal * structure must not be modified by the caller during graph * traversal. After the graph walk, the resources must be released * using media_graph_walk_cleanup(). */ void media_graph_walk_start(struct media_graph *graph, struct media_entity *entity); /** * media_graph_walk_next - Get the next entity in the graph * @graph: Media graph structure * * This function is deprecated, use media_pipeline_for_each_pad() instead. * * Perform a depth-first traversal of the given media entities graph. * * The graph structure must have been previously initialized with a call to * media_graph_walk_start(). * * Return: returns the next entity in the graph or %NULL if the whole graph * have been traversed. */ struct media_entity *media_graph_walk_next(struct media_graph *graph); /** * media_pipeline_start - Mark a pipeline as streaming * @origin: Starting pad * @pipe: Media pipeline to be assigned to all pads in the pipeline. * * Mark all pads connected to pad @origin through enabled links, either * directly or indirectly, as streaming. The given pipeline object is assigned * to every pad in the pipeline and stored in the media_pad pipe field. * * Calls to this function can be nested, in which case the same number of * media_pipeline_stop() calls will be required to stop streaming. The * pipeline pointer must be identical for all nested calls to * media_pipeline_start(). */ __must_check int media_pipeline_start(struct media_pad *origin, struct media_pipeline *pipe); /** * __media_pipeline_start - Mark a pipeline as streaming * * @origin: Starting pad * @pipe: Media pipeline to be assigned to all pads in the pipeline. * * ..note:: This is the non-locking version of media_pipeline_start() */ __must_check int __media_pipeline_start(struct media_pad *origin, struct media_pipeline *pipe); /** * media_pipeline_stop - Mark a pipeline as not streaming * @pad: Starting pad * * Mark all pads connected to a given pad through enabled links, either * directly or indirectly, as not streaming. The media_pad pipe field is * reset to %NULL. * * If multiple calls to media_pipeline_start() have been made, the same * number of calls to this function are required to mark the pipeline as not * streaming. */ void media_pipeline_stop(struct media_pad *pad); /** * __media_pipeline_stop - Mark a pipeline as not streaming * * @pad: Starting pad * * .. note:: This is the non-locking version of media_pipeline_stop() */ void __media_pipeline_stop(struct media_pad *pad); struct media_pad * __media_pipeline_pad_iter_next(struct media_pipeline *pipe, struct media_pipeline_pad_iter *iter, struct media_pad *pad); /** * media_pipeline_for_each_pad - Iterate on all pads in a media pipeline * @pipe: The pipeline * @iter: The iterator (struct media_pipeline_pad_iter) * @pad: The iterator pad * * Iterate on all pads in a media pipeline. This is only valid after the * pipeline has been built with media_pipeline_start() and before it gets * destroyed with media_pipeline_stop(). */ #define media_pipeline_for_each_pad(pipe, iter, pad) \ for (pad = __media_pipeline_pad_iter_next((pipe), iter, NULL); \ pad != NULL; \ pad = __media_pipeline_pad_iter_next((pipe), iter, pad)) /** * media_pipeline_entity_iter_init - Initialize a pipeline entity iterator * @pipe: The pipeline * @iter: The iterator * * This function must be called to initialize the iterator before using it in a * media_pipeline_for_each_entity() loop. The iterator must be destroyed by a * call to media_pipeline_entity_iter_cleanup after the loop (including in code * paths that break from the loop). * * The same iterator can be used in multiple consecutive loops without being * destroyed and reinitialized. * * Return: 0 on success or a negative error code otherwise. */ int media_pipeline_entity_iter_init(struct media_pipeline *pipe, struct media_pipeline_entity_iter *iter); /** * media_pipeline_entity_iter_cleanup - Destroy a pipeline entity iterator * @iter: The iterator * * This function must be called to destroy iterators initialized with * media_pipeline_entity_iter_init(). */ void media_pipeline_entity_iter_cleanup(struct media_pipeline_entity_iter *iter); struct media_entity * __media_pipeline_entity_iter_next(struct media_pipeline *pipe, struct media_pipeline_entity_iter *iter, struct media_entity *entity); /** * media_pipeline_for_each_entity - Iterate on all entities in a media pipeline * @pipe: The pipeline * @iter: The iterator (struct media_pipeline_entity_iter) * @entity: The iterator entity * * Iterate on all entities in a media pipeline. This is only valid after the * pipeline has been built with media_pipeline_start() and before it gets * destroyed with media_pipeline_stop(). The iterator must be initialized with * media_pipeline_entity_iter_init() before iteration, and destroyed with * media_pipeline_entity_iter_cleanup() after (including in code paths that * break from the loop). */ #define media_pipeline_for_each_entity(pipe, iter, entity) \ for (entity = __media_pipeline_entity_iter_next((pipe), iter, NULL); \ entity != NULL; \ entity = __media_pipeline_entity_iter_next((pipe), iter, entity)) /** * media_pipeline_alloc_start - Mark a pipeline as streaming * @pad: Starting pad * * media_pipeline_alloc_start() is similar to media_pipeline_start() but instead * of working on a given pipeline the function will use an existing pipeline if * the pad is already part of a pipeline, or allocate a new pipeline. * * Calls to media_pipeline_alloc_start() must be matched with * media_pipeline_stop(). */ __must_check int media_pipeline_alloc_start(struct media_pad *pad); /** * media_devnode_create() - creates and initializes a device node interface * * @mdev: pointer to struct &media_device * @type: type of the interface, as given by * :ref:`include/uapi/linux/media.h <media_header>` * ( seek for ``MEDIA_INTF_T_*``) macros. * @flags: Interface flags, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * ( seek for ``MEDIA_INTF_FL_*``) * @major: Device node major number. * @minor: Device node minor number. * * Return: if succeeded, returns a pointer to the newly allocated * &media_intf_devnode pointer. * * .. note:: * * Currently, no flags for &media_interface is defined. */ struct media_intf_devnode * __must_check media_devnode_create(struct media_device *mdev, u32 type, u32 flags, u32 major, u32 minor); /** * media_devnode_remove() - removes a device node interface * * @devnode: pointer to &media_intf_devnode to be freed. * * When a device node interface is removed, all links to it are automatically * removed. */ void media_devnode_remove(struct media_intf_devnode *devnode); /** * media_create_intf_link() - creates a link between an entity and an interface * * @entity: pointer to %media_entity * @intf: pointer to %media_interface * @flags: Link flags, as defined in * :ref:`include/uapi/linux/media.h <media_header>` * ( seek for ``MEDIA_LNK_FL_*``) * * * Valid values for flags: * * %MEDIA_LNK_FL_ENABLED * Indicates that the interface is connected to the entity hardware. * That's the default value for interfaces. An interface may be disabled if * the hardware is busy due to the usage of some other interface that it is * currently controlling the hardware. * * A typical example is an hybrid TV device that handle only one type of * stream on a given time. So, when the digital TV is streaming, * the V4L2 interfaces won't be enabled, as such device is not able to * also stream analog TV or radio. * * .. note:: * * Before calling this function, media_devnode_create() should be called for * the interface and media_device_register_entity() should be called for the * interface that will be part of the link. */ struct media_link * __must_check media_create_intf_link(struct media_entity *entity, struct media_interface *intf, u32 flags); /** * __media_remove_intf_link() - remove a single interface link * * @link: pointer to &media_link. * * .. note:: This is an unlocked version of media_remove_intf_link() */ void __media_remove_intf_link(struct media_link *link); /** * media_remove_intf_link() - remove a single interface link * * @link: pointer to &media_link. * * .. note:: Prefer to use this one, instead of __media_remove_intf_link() */ void media_remove_intf_link(struct media_link *link); /** * __media_remove_intf_links() - remove all links associated with an interface * * @intf: pointer to &media_interface * * .. note:: This is an unlocked version of media_remove_intf_links(). */ void __media_remove_intf_links(struct media_interface *intf); /** * media_remove_intf_links() - remove all links associated with an interface * * @intf: pointer to &media_interface * * .. note:: * * #) This is called automatically when an entity is unregistered via * media_device_register_entity() and by media_devnode_remove(). * * #) Prefer to use this one, instead of __media_remove_intf_links(). */ void media_remove_intf_links(struct media_interface *intf); /** * media_entity_call - Calls a struct media_entity_operations operation on * an entity * * @entity: entity where the @operation will be called * @operation: type of the operation. Should be the name of a member of * struct &media_entity_operations. * * This helper function will check if @operation is not %NULL. On such case, * it will issue a call to @operation\(@entity, @args\). */ #define media_entity_call(entity, operation, args...) \ (((entity)->ops && (entity)->ops->operation) ? \ (entity)->ops->operation((entity) , ##args) : -ENOIOCTLCMD) /** * media_create_ancillary_link() - create an ancillary link between two * instances of &media_entity * * @primary: pointer to the primary &media_entity * @ancillary: pointer to the ancillary &media_entity * * Create an ancillary link between two entities, indicating that they * represent two connected pieces of hardware that form a single logical unit. * A typical example is a camera lens controller being linked to the sensor that * it is supporting. * * The function sets both MEDIA_LNK_FL_ENABLED and MEDIA_LNK_FL_IMMUTABLE for * the new link. */ struct media_link * media_create_ancillary_link(struct media_entity *primary, struct media_entity *ancillary); /** * __media_entity_next_link() - Iterate through a &media_entity's links * * @entity: pointer to the &media_entity * @link: pointer to a &media_link to hold the iterated values * @link_type: one of the MEDIA_LNK_FL_LINK_TYPE flags * * Return the next link against an entity matching a specific link type. This * allows iteration through an entity's links whilst guaranteeing all of the * returned links are of the given type. */ struct media_link *__media_entity_next_link(struct media_entity *entity, struct media_link *link, unsigned long link_type); /** * for_each_media_entity_data_link() - Iterate through an entity's data links * * @entity: pointer to the &media_entity * @link: pointer to a &media_link to hold the iterated values * * Iterate over a &media_entity's data links */ #define for_each_media_entity_data_link(entity, link) \ for (link = __media_entity_next_link(entity, NULL, \ MEDIA_LNK_FL_DATA_LINK); \ link; \ link = __media_entity_next_link(entity, link, \ MEDIA_LNK_FL_DATA_LINK)) #endif |
| 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | // SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ /* * This file implements UBIFS shrinker which evicts clean znodes from the TNC * tree when Linux VM needs more RAM. * * We do not implement any LRU lists to find oldest znodes to free because it * would add additional overhead to the file system fast paths. So the shrinker * just walks the TNC tree when searching for znodes to free. * * If the root of a TNC sub-tree is clean and old enough, then the children are * also clean and old enough. So the shrinker walks the TNC in level order and * dumps entire sub-trees. * * The age of znodes is just the time-stamp when they were last looked at. * The current shrinker first tries to evict old znodes, then young ones. * * Since the shrinker is global, it has to protect against races with FS * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. */ #include "ubifs.h" /* List of all UBIFS file-system instances */ LIST_HEAD(ubifs_infos); /* * We number each shrinker run and record the number on the ubifs_info structure * so that we can easily work out which ubifs_info structures have already been * done by the current run. */ static unsigned int shrinker_run_no; /* Protects 'ubifs_infos' list */ DEFINE_SPINLOCK(ubifs_infos_lock); /* Global clean znode counter (for all mounted UBIFS instances) */ atomic_long_t ubifs_clean_zn_cnt; /** * shrink_tnc - shrink TNC tree. * @c: UBIFS file-system description object * @nr: number of znodes to free * @age: the age of znodes to free * @contention: if any contention, this is set to %1 * * This function traverses TNC tree and frees clean znodes. It does not free * clean znodes which younger then @age. Returns number of freed znodes. */ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) { int total_freed = 0; struct ubifs_znode *znode, *zprev; time64_t time = ktime_get_seconds(); ubifs_assert(c, mutex_is_locked(&c->umount_mutex)); ubifs_assert(c, mutex_is_locked(&c->tnc_mutex)); if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) return 0; /* * Traverse the TNC tree in levelorder manner, so that it is possible * to destroy large sub-trees. Indeed, if a znode is old, then all its * children are older or of the same age. * * Note, we are holding 'c->tnc_mutex', so we do not have to lock the * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is * changed only when the 'c->tnc_mutex' is held. */ zprev = NULL; znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); while (znode && total_freed < nr && atomic_long_read(&c->clean_zn_cnt) > 0) { int freed; /* * If the znode is clean, but it is in the 'c->cnext' list, this * means that this znode has just been written to flash as a * part of commit and was marked clean. They will be removed * from the list at end commit. We cannot change the list, * because it is not protected by any mutex (design decision to * make commit really independent and parallel to main I/O). So * we just skip these znodes. * * Note, the 'clean_zn_cnt' counters are not updated until * after the commit, so the UBIFS shrinker does not report * the znodes which are in the 'c->cnext' list as freeable. * * Also note, if the root of a sub-tree is not in 'c->cnext', * then the whole sub-tree is not in 'c->cnext' as well, so it * is safe to dump whole sub-tree. */ if (znode->cnext) { /* * Very soon these znodes will be removed from the list * and become freeable. */ *contention = 1; } else if (!ubifs_zn_dirty(znode) && abs(time - znode->time) >= age) { if (znode->parent) znode->parent->zbranch[znode->iip].znode = NULL; else c->zroot.znode = NULL; freed = ubifs_destroy_tnc_subtree(c, znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); total_freed += freed; znode = zprev; } if (unlikely(!c->zroot.znode)) break; zprev = znode; znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); cond_resched(); } return total_freed; } /** * shrink_tnc_trees - shrink UBIFS TNC trees. * @nr: number of znodes to free * @age: the age of znodes to free * @contention: if any contention, this is set to %1 * * This function walks the list of mounted UBIFS file-systems and frees clean * znodes which are older than @age, until at least @nr znodes are freed. * Returns the number of freed znodes. */ static int shrink_tnc_trees(int nr, int age, int *contention) { struct ubifs_info *c; struct list_head *p; unsigned int run_no; int freed = 0; spin_lock(&ubifs_infos_lock); do { run_no = ++shrinker_run_no; } while (run_no == 0); /* Iterate over all mounted UBIFS file-systems and try to shrink them */ p = ubifs_infos.next; while (p != &ubifs_infos) { c = list_entry(p, struct ubifs_info, infos_list); /* * We move the ones we do to the end of the list, so we stop * when we see one we have already done. */ if (c->shrinker_run_no == run_no) break; if (!mutex_trylock(&c->umount_mutex)) { /* Some un-mount is in progress, try next FS */ *contention = 1; p = p->next; continue; } /* * We're holding 'c->umount_mutex', so the file-system won't go * away. */ if (!mutex_trylock(&c->tnc_mutex)) { mutex_unlock(&c->umount_mutex); *contention = 1; p = p->next; continue; } spin_unlock(&ubifs_infos_lock); /* * OK, now we have TNC locked, the file-system cannot go away - * it is safe to reap the cache. */ c->shrinker_run_no = run_no; freed += shrink_tnc(c, nr, age, contention); mutex_unlock(&c->tnc_mutex); spin_lock(&ubifs_infos_lock); /* Get the next list element before we move this one */ p = p->next; /* * Move this one to the end of the list to provide some * fairness. */ list_move_tail(&c->infos_list, &ubifs_infos); mutex_unlock(&c->umount_mutex); if (freed >= nr) break; } spin_unlock(&ubifs_infos_lock); return freed; } /** * kick_a_thread - kick a background thread to start commit. * * This function kicks a background thread to start background commit. Returns * %-1 if a thread was kicked or there is another reason to assume the memory * will soon be freed or become freeable. If there are no dirty znodes, returns * %0. */ static int kick_a_thread(void) { int i; struct ubifs_info *c; /* * Iterate over all mounted UBIFS file-systems and find out if there is * already an ongoing commit operation there. If no, then iterate for * the second time and initiate background commit. */ spin_lock(&ubifs_infos_lock); for (i = 0; i < 2; i++) { list_for_each_entry(c, &ubifs_infos, infos_list) { long dirty_zn_cnt; if (!mutex_trylock(&c->umount_mutex)) { /* * Some un-mount is in progress, it will * certainly free memory, so just return. */ spin_unlock(&ubifs_infos_lock); return -1; } dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || c->ro_mount || c->ro_error) { mutex_unlock(&c->umount_mutex); continue; } if (c->cmt_state != COMMIT_RESTING) { spin_unlock(&ubifs_infos_lock); mutex_unlock(&c->umount_mutex); return -1; } if (i == 1) { list_move_tail(&c->infos_list, &ubifs_infos); spin_unlock(&ubifs_infos_lock); ubifs_request_bg_commit(c); mutex_unlock(&c->umount_mutex); return -1; } mutex_unlock(&c->umount_mutex); } } spin_unlock(&ubifs_infos_lock); return 0; } unsigned long ubifs_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); /* * Due to the way UBIFS updates the clean znode counter it may * temporarily be negative. */ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; } unsigned long ubifs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { unsigned long nr = sc->nr_to_scan; int contention = 0; unsigned long freed; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (!clean_zn_cnt) { /* * No clean znodes, nothing to reap. All we can do in this case * is to kick background threads to start commit, which will * probably make clean znodes which, in turn, will be freeable. * And we return -1 which means will make VM call us again * later. */ dbg_tnc("no clean znodes, kick a thread"); return kick_a_thread(); } freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); if (freed >= nr) goto out; dbg_tnc("not enough old znodes, try to free young ones"); freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); if (freed >= nr) goto out; dbg_tnc("not enough young znodes, free all"); freed += shrink_tnc_trees(nr - freed, 0, &contention); if (!freed && contention) { dbg_tnc("freed nothing, but contention"); return SHRINK_STOP; } out: dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); return freed; } |
| 2 2 3 103 103 1 4 2 1 103 5 98 7 7 3 4 5 2 7 11 1 8 8 31 6 6 3 2 1 1 1 1 1 1 2 1 1 2 2 1 1 1 1 2 8 2 8 8 2 8 8 1 5 8 1 3 6 5 1 4 13 1 12 1 2 1 1 1 1 1 1 1 1 1 1 3 3 3 1 2 2 2 2 2 1 1 1 6 5 7 7 79 7 1 1 1 12 10 10 10 16 16 16 6 6 6 27 27 27 27 27 27 1 27 11 1 16 26 2 23 2 5 22 3 23 22 36 36 27 36 198 12 71 117 83 82 1 73 86 86 24 5 2 3 2 2 1 1 2 1 1 2 1 7 7 1 1 2 1 1 3 3 1 1 6 1 2 1 1 1 3 2 1 1 7 1 3 3 3 5 2 6 5 1 14 14 1 3 1 9 3 2 2 8 1 1 2 1 3 8 8 2 1 1 1 1 2 15 15 1 3 12 1 6 4 4 45 45 2 133 134 132 132 132 9 9 9 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 | // SPDX-License-Identifier: GPL-2.0 /* * Written for linux by Johan Myreen as a translation from * the assembly version by Linus (with diacriticals added) * * Some additional features added by Christoph Niemann (ChN), March 1993 * * Loadable keymaps by Risto Kankkunen, May 1993 * * Diacriticals redone & other small changes, aeb@cwi.nl, June 1993 * Added decr/incr_console, dynamic keymaps, Unicode support, * dynamic function/string keys, led setting, Sept 1994 * `Sticky' modifier keys, 951006. * * 11-11-96: SAK should now work in the raw mode (Martin Mares) * * Modified to provide 'generic' keyboard support by Hamish Macdonald * Merge with the m68k keyboard driver and split-off of the PC low-level * parts by Geert Uytterhoeven, May 1997 * * 27-05-97: Added support for the Magic SysRq Key (Martin Mares) * 30-07-98: Dead keys redone, aeb@cwi.nl. * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/consolemap.h> #include <linux/init.h> #include <linux/input.h> #include <linux/jiffies.h> #include <linux/kbd_diacr.h> #include <linux/kbd_kern.h> #include <linux/leds.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/nospec.h> #include <linux/notifier.h> #include <linux/reboot.h> #include <linux/sched/debug.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/tty_flip.h> #include <linux/tty.h> #include <linux/uaccess.h> #include <linux/vt_kern.h> #include <asm/irq_regs.h> /* * Exported functions/variables */ #define KBD_DEFMODE (BIT(VC_REPEAT) | BIT(VC_META)) #if defined(CONFIG_X86) || defined(CONFIG_PARISC) #include <asm/kbdleds.h> #else static inline int kbd_defleds(void) { return 0; } #endif #define KBD_DEFLOCK 0 /* * Handler Tables. */ #define K_HANDLERS\ k_self, k_fn, k_spec, k_pad,\ k_dead, k_cons, k_cur, k_shift,\ k_meta, k_ascii, k_lock, k_lowercase,\ k_slock, k_dead2, k_brl, k_ignore typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value, char up_flag); static k_handler_fn K_HANDLERS; static k_handler_fn *k_handler[16] = { K_HANDLERS }; #define FN_HANDLERS\ fn_null, fn_enter, fn_show_ptregs, fn_show_mem,\ fn_show_state, fn_send_intr, fn_lastcons, fn_caps_toggle,\ fn_num, fn_hold, fn_scroll_forw, fn_scroll_back,\ fn_boot_it, fn_caps_on, fn_compose, fn_SAK,\ fn_dec_console, fn_inc_console, fn_spawn_con, fn_bare_num typedef void (fn_handler_fn)(struct vc_data *vc); static fn_handler_fn FN_HANDLERS; static fn_handler_fn *fn_handler[] = { FN_HANDLERS }; /* * Variables exported for vt_ioctl.c */ struct vt_spawn_console vt_spawn_con = { .lock = __SPIN_LOCK_UNLOCKED(vt_spawn_con.lock), .pid = NULL, .sig = 0, }; /* * Internal Data. */ static struct kbd_struct kbd_table[MAX_NR_CONSOLES]; static struct kbd_struct *kbd = kbd_table; /* maximum values each key_handler can handle */ static const unsigned char max_vals[] = { [ KT_LATIN ] = 255, [ KT_FN ] = ARRAY_SIZE(func_table) - 1, [ KT_SPEC ] = ARRAY_SIZE(fn_handler) - 1, [ KT_PAD ] = NR_PAD - 1, [ KT_DEAD ] = NR_DEAD - 1, [ KT_CONS ] = 255, [ KT_CUR ] = 3, [ KT_SHIFT ] = NR_SHIFT - 1, [ KT_META ] = 255, [ KT_ASCII ] = NR_ASCII - 1, [ KT_LOCK ] = NR_LOCK - 1, [ KT_LETTER ] = 255, [ KT_SLOCK ] = NR_LOCK - 1, [ KT_DEAD2 ] = 255, [ KT_BRL ] = NR_BRL - 1, }; static const int NR_TYPES = ARRAY_SIZE(max_vals); static void kbd_bh(struct tasklet_struct *unused); static DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh); static struct input_handler kbd_handler; static DEFINE_SPINLOCK(kbd_event_lock); static DEFINE_SPINLOCK(led_lock); static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf' and friends */ static DECLARE_BITMAP(key_down, KEY_CNT); /* keyboard key bitmap */ static unsigned char shift_down[NR_SHIFT]; /* shift state counters.. */ static bool dead_key_next; /* Handles a number being assembled on the number pad */ static bool npadch_active; static unsigned int npadch_value; static unsigned int diacr; static bool rep; /* flag telling character repeat */ static int shift_state = 0; static unsigned int ledstate = -1U; /* undefined */ static unsigned char ledioctl; static bool vt_switch; /* * Notifier list for console keyboard events */ static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list); int register_keyboard_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&keyboard_notifier_list, nb); } EXPORT_SYMBOL_GPL(register_keyboard_notifier); int unregister_keyboard_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb); } EXPORT_SYMBOL_GPL(unregister_keyboard_notifier); /* * Translation of scancodes to keycodes. We set them on only the first * keyboard in the list that accepts the scancode and keycode. * Explanation for not choosing the first attached keyboard anymore: * USB keyboards for example have two event devices: one for all "normal" * keys and one for extra function keys (like "volume up", "make coffee", * etc.). So this means that scancodes for the extra function keys won't * be valid for the first event device, but will be for the second. */ struct getset_keycode_data { struct input_keymap_entry ke; int error; }; static int getkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; d->error = input_get_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully get one */ } static int getkeycode(unsigned int scancode) { struct getset_keycode_data d = { .ke = { .flags = 0, .len = sizeof(scancode), .keycode = 0, }, .error = -ENODEV, }; memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper); return d.error ?: d.ke.keycode; } static int setkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; d->error = input_set_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully set one */ } static int setkeycode(unsigned int scancode, unsigned int keycode) { struct getset_keycode_data d = { .ke = { .flags = 0, .len = sizeof(scancode), .keycode = keycode, }, .error = -ENODEV, }; memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper); return d.error; } /* * Making beeps and bells. Note that we prefer beeps to bells, but when * shutting the sound off we do both. */ static int kd_sound_helper(struct input_handle *handle, void *data) { unsigned int *hz = data; struct input_dev *dev = handle->dev; if (test_bit(EV_SND, dev->evbit)) { if (test_bit(SND_TONE, dev->sndbit)) { input_inject_event(handle, EV_SND, SND_TONE, *hz); if (*hz) return 0; } if (test_bit(SND_BELL, dev->sndbit)) input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0); } return 0; } static void kd_nosound(struct timer_list *unused) { static unsigned int zero; input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper); } static DEFINE_TIMER(kd_mksound_timer, kd_nosound); void kd_mksound(unsigned int hz, unsigned int ticks) { del_timer_sync(&kd_mksound_timer); input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper); if (hz && ticks) mod_timer(&kd_mksound_timer, jiffies + ticks); } EXPORT_SYMBOL(kd_mksound); /* * Setting the keyboard rate. */ static int kbd_rate_helper(struct input_handle *handle, void *data) { struct input_dev *dev = handle->dev; struct kbd_repeat *rpt = data; if (test_bit(EV_REP, dev->evbit)) { if (rpt[0].delay > 0) input_inject_event(handle, EV_REP, REP_DELAY, rpt[0].delay); if (rpt[0].period > 0) input_inject_event(handle, EV_REP, REP_PERIOD, rpt[0].period); rpt[1].delay = dev->rep[REP_DELAY]; rpt[1].period = dev->rep[REP_PERIOD]; } return 0; } int kbd_rate(struct kbd_repeat *rpt) { struct kbd_repeat data[2] = { *rpt }; input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper); *rpt = data[1]; /* Copy currently used settings */ return 0; } /* * Helper Functions. */ static void put_queue(struct vc_data *vc, int ch) { tty_insert_flip_char(&vc->port, ch, 0); tty_flip_buffer_push(&vc->port); } static void puts_queue(struct vc_data *vc, const char *cp) { tty_insert_flip_string(&vc->port, cp, strlen(cp)); tty_flip_buffer_push(&vc->port); } static void applkey(struct vc_data *vc, int key, char mode) { static char buf[] = { 0x1b, 'O', 0x00, 0x00 }; buf[1] = (mode ? 'O' : '['); buf[2] = key; puts_queue(vc, buf); } /* * Many other routines do put_queue, but I think either * they produce ASCII, or they produce some user-assigned * string, and in both cases we might assume that it is * in utf-8 already. */ static void to_utf8(struct vc_data *vc, uint c) { if (c < 0x80) /* 0******* */ put_queue(vc, c); else if (c < 0x800) { /* 110***** 10****** */ put_queue(vc, 0xc0 | (c >> 6)); put_queue(vc, 0x80 | (c & 0x3f)); } else if (c < 0x10000) { if (c >= 0xD800 && c < 0xE000) return; if (c == 0xFFFF) return; /* 1110**** 10****** 10****** */ put_queue(vc, 0xe0 | (c >> 12)); put_queue(vc, 0x80 | ((c >> 6) & 0x3f)); put_queue(vc, 0x80 | (c & 0x3f)); } else if (c < 0x110000) { /* 11110*** 10****** 10****** 10****** */ put_queue(vc, 0xf0 | (c >> 18)); put_queue(vc, 0x80 | ((c >> 12) & 0x3f)); put_queue(vc, 0x80 | ((c >> 6) & 0x3f)); put_queue(vc, 0x80 | (c & 0x3f)); } } /* FIXME: review locking for vt.c callers */ static void set_leds(void) { tasklet_schedule(&keyboard_tasklet); } /* * Called after returning from RAW mode or when changing consoles - recompute * shift_down[] and shift_state from key_down[] maybe called when keymap is * undefined, so that shiftkey release is seen. The caller must hold the * kbd_event_lock. */ static void do_compute_shiftstate(void) { unsigned int k, sym, val; shift_state = 0; memset(shift_down, 0, sizeof(shift_down)); for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) { sym = U(key_maps[0][k]); if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK) continue; val = KVAL(sym); if (val == KVAL(K_CAPSSHIFT)) val = KVAL(K_SHIFT); shift_down[val]++; shift_state |= BIT(val); } } /* We still have to export this method to vt.c */ void vt_set_leds_compute_shiftstate(void) { unsigned long flags; /* * When VT is switched, the keyboard led needs to be set once. * Ensure that after the switch is completed, the state of the * keyboard LED is consistent with the state of the keyboard lock. */ vt_switch = true; set_leds(); spin_lock_irqsave(&kbd_event_lock, flags); do_compute_shiftstate(); spin_unlock_irqrestore(&kbd_event_lock, flags); } /* * We have a combining character DIACR here, followed by the character CH. * If the combination occurs in the table, return the corresponding value. * Otherwise, if CH is a space or equals DIACR, return DIACR. * Otherwise, conclude that DIACR was not combining after all, * queue it and return CH. */ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch) { unsigned int d = diacr; unsigned int i; diacr = 0; if ((d & ~0xff) == BRL_UC_ROW) { if ((ch & ~0xff) == BRL_UC_ROW) return d | ch; } else { for (i = 0; i < accent_table_size; i++) if (accent_table[i].diacr == d && accent_table[i].base == ch) return accent_table[i].result; } if (ch == ' ' || ch == (BRL_UC_ROW|0) || ch == d) return d; if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, d); else { int c = conv_uni_to_8bit(d); if (c != -1) put_queue(vc, c); } return ch; } /* * Special function handlers */ static void fn_enter(struct vc_data *vc) { if (diacr) { if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, diacr); else { int c = conv_uni_to_8bit(diacr); if (c != -1) put_queue(vc, c); } diacr = 0; } put_queue(vc, '\r'); if (vc_kbd_mode(kbd, VC_CRLF)) put_queue(vc, '\n'); } static void fn_caps_toggle(struct vc_data *vc) { if (rep) return; chg_vc_kbd_led(kbd, VC_CAPSLOCK); } static void fn_caps_on(struct vc_data *vc) { if (rep) return; set_vc_kbd_led(kbd, VC_CAPSLOCK); } static void fn_show_ptregs(struct vc_data *vc) { struct pt_regs *regs = get_irq_regs(); if (regs) show_regs(regs); } static void fn_hold(struct vc_data *vc) { struct tty_struct *tty = vc->port.tty; if (rep || !tty) return; /* * Note: SCROLLOCK will be set (cleared) by stop_tty (start_tty); * these routines are also activated by ^S/^Q. * (And SCROLLOCK can also be set by the ioctl KDSKBLED.) */ if (tty->flow.stopped) start_tty(tty); else stop_tty(tty); } static void fn_num(struct vc_data *vc) { if (vc_kbd_mode(kbd, VC_APPLIC)) applkey(vc, 'P', 1); else fn_bare_num(vc); } /* * Bind this to Shift-NumLock if you work in application keypad mode * but want to be able to change the NumLock flag. * Bind this to NumLock if you prefer that the NumLock key always * changes the NumLock flag. */ static void fn_bare_num(struct vc_data *vc) { if (!rep) chg_vc_kbd_led(kbd, VC_NUMLOCK); } static void fn_lastcons(struct vc_data *vc) { /* switch to the last used console, ChN */ set_console(last_console); } static void fn_dec_console(struct vc_data *vc) { int i, cur = fg_console; /* Currently switching? Queue this next switch relative to that. */ if (want_console != -1) cur = want_console; for (i = cur - 1; i != cur; i--) { if (i == -1) i = MAX_NR_CONSOLES - 1; if (vc_cons_allocated(i)) break; } set_console(i); } static void fn_inc_console(struct vc_data *vc) { int i, cur = fg_console; /* Currently switching? Queue this next switch relative to that. */ if (want_console != -1) cur = want_console; for (i = cur+1; i != cur; i++) { if (i == MAX_NR_CONSOLES) i = 0; if (vc_cons_allocated(i)) break; } set_console(i); } static void fn_send_intr(struct vc_data *vc) { tty_insert_flip_char(&vc->port, 0, TTY_BREAK); tty_flip_buffer_push(&vc->port); } static void fn_scroll_forw(struct vc_data *vc) { scrollfront(vc, 0); } static void fn_scroll_back(struct vc_data *vc) { scrollback(vc); } static void fn_show_mem(struct vc_data *vc) { show_mem(); } static void fn_show_state(struct vc_data *vc) { show_state(); } static void fn_boot_it(struct vc_data *vc) { ctrl_alt_del(); } static void fn_compose(struct vc_data *vc) { dead_key_next = true; } static void fn_spawn_con(struct vc_data *vc) { spin_lock(&vt_spawn_con.lock); if (vt_spawn_con.pid) if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) { put_pid(vt_spawn_con.pid); vt_spawn_con.pid = NULL; } spin_unlock(&vt_spawn_con.lock); } static void fn_SAK(struct vc_data *vc) { struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work; schedule_work(SAK_work); } static void fn_null(struct vc_data *vc) { do_compute_shiftstate(); } /* * Special key handlers */ static void k_ignore(struct vc_data *vc, unsigned char value, char up_flag) { } static void k_spec(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; if (value >= ARRAY_SIZE(fn_handler)) return; if ((kbd->kbdmode == VC_RAW || kbd->kbdmode == VC_MEDIUMRAW || kbd->kbdmode == VC_OFF) && value != KVAL(K_SAK)) return; /* SAK is allowed even in raw mode */ fn_handler[value](vc); } static void k_lowercase(struct vc_data *vc, unsigned char value, char up_flag) { pr_err("k_lowercase was called - impossible\n"); } static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag) { if (up_flag) return; /* no action, if this is a key release */ if (diacr) value = handle_diacr(vc, value); if (dead_key_next) { dead_key_next = false; diacr = value; return; } if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, value); else { int c = conv_uni_to_8bit(value); if (c != -1) put_queue(vc, c); } } /* * Handle dead key. Note that we now may have several * dead keys modifying the same character. Very useful * for Vietnamese. */ static void k_deadunicode(struct vc_data *vc, unsigned int value, char up_flag) { if (up_flag) return; diacr = (diacr ? handle_diacr(vc, value) : value); } static void k_self(struct vc_data *vc, unsigned char value, char up_flag) { k_unicode(vc, conv_8bit_to_uni(value), up_flag); } static void k_dead2(struct vc_data *vc, unsigned char value, char up_flag) { k_deadunicode(vc, value, up_flag); } /* * Obsolete - for backwards compatibility only */ static void k_dead(struct vc_data *vc, unsigned char value, char up_flag) { static const unsigned char ret_diacr[NR_DEAD] = { '`', /* dead_grave */ '\'', /* dead_acute */ '^', /* dead_circumflex */ '~', /* dead_tilda */ '"', /* dead_diaeresis */ ',', /* dead_cedilla */ '_', /* dead_macron */ 'U', /* dead_breve */ '.', /* dead_abovedot */ '*', /* dead_abovering */ '=', /* dead_doubleacute */ 'c', /* dead_caron */ 'k', /* dead_ogonek */ 'i', /* dead_iota */ '#', /* dead_voiced_sound */ 'o', /* dead_semivoiced_sound */ '!', /* dead_belowdot */ '?', /* dead_hook */ '+', /* dead_horn */ '-', /* dead_stroke */ ')', /* dead_abovecomma */ '(', /* dead_abovereversedcomma */ ':', /* dead_doublegrave */ 'n', /* dead_invertedbreve */ ';', /* dead_belowcomma */ '$', /* dead_currency */ '@', /* dead_greek */ }; k_deadunicode(vc, ret_diacr[value], up_flag); } static void k_cons(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; set_console(value); } static void k_fn(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; if ((unsigned)value < ARRAY_SIZE(func_table)) { unsigned long flags; spin_lock_irqsave(&func_buf_lock, flags); if (func_table[value]) puts_queue(vc, func_table[value]); spin_unlock_irqrestore(&func_buf_lock, flags); } else pr_err("k_fn called with value=%d\n", value); } static void k_cur(struct vc_data *vc, unsigned char value, char up_flag) { static const char cur_chars[] = "BDCA"; if (up_flag) return; applkey(vc, cur_chars[value], vc_kbd_mode(kbd, VC_CKMODE)); } static void k_pad(struct vc_data *vc, unsigned char value, char up_flag) { static const char pad_chars[] = "0123456789+-*/\015,.?()#"; static const char app_map[] = "pqrstuvwxylSRQMnnmPQS"; if (up_flag) return; /* no action, if this is a key release */ /* kludge... shift forces cursor/number keys */ if (vc_kbd_mode(kbd, VC_APPLIC) && !shift_down[KG_SHIFT]) { applkey(vc, app_map[value], 1); return; } if (!vc_kbd_led(kbd, VC_NUMLOCK)) { switch (value) { case KVAL(K_PCOMMA): case KVAL(K_PDOT): k_fn(vc, KVAL(K_REMOVE), 0); return; case KVAL(K_P0): k_fn(vc, KVAL(K_INSERT), 0); return; case KVAL(K_P1): k_fn(vc, KVAL(K_SELECT), 0); return; case KVAL(K_P2): k_cur(vc, KVAL(K_DOWN), 0); return; case KVAL(K_P3): k_fn(vc, KVAL(K_PGDN), 0); return; case KVAL(K_P4): k_cur(vc, KVAL(K_LEFT), 0); return; case KVAL(K_P6): k_cur(vc, KVAL(K_RIGHT), 0); return; case KVAL(K_P7): k_fn(vc, KVAL(K_FIND), 0); return; case KVAL(K_P8): k_cur(vc, KVAL(K_UP), 0); return; case KVAL(K_P9): k_fn(vc, KVAL(K_PGUP), 0); return; case KVAL(K_P5): applkey(vc, 'G', vc_kbd_mode(kbd, VC_APPLIC)); return; } } put_queue(vc, pad_chars[value]); if (value == KVAL(K_PENTER) && vc_kbd_mode(kbd, VC_CRLF)) put_queue(vc, '\n'); } static void k_shift(struct vc_data *vc, unsigned char value, char up_flag) { int old_state = shift_state; if (rep) return; /* * Mimic typewriter: * a CapsShift key acts like Shift but undoes CapsLock */ if (value == KVAL(K_CAPSSHIFT)) { value = KVAL(K_SHIFT); if (!up_flag) clr_vc_kbd_led(kbd, VC_CAPSLOCK); } if (up_flag) { /* * handle the case that two shift or control * keys are depressed simultaneously */ if (shift_down[value]) shift_down[value]--; } else shift_down[value]++; if (shift_down[value]) shift_state |= BIT(value); else shift_state &= ~BIT(value); /* kludge */ if (up_flag && shift_state != old_state && npadch_active) { if (kbd->kbdmode == VC_UNICODE) to_utf8(vc, npadch_value); else put_queue(vc, npadch_value & 0xff); npadch_active = false; } } static void k_meta(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag) return; if (vc_kbd_mode(kbd, VC_META)) { put_queue(vc, '\033'); put_queue(vc, value); } else put_queue(vc, value | BIT(7)); } static void k_ascii(struct vc_data *vc, unsigned char value, char up_flag) { unsigned int base; if (up_flag) return; if (value < 10) { /* decimal input of code, while Alt depressed */ base = 10; } else { /* hexadecimal input of code, while AltGr depressed */ value -= 10; base = 16; } if (!npadch_active) { npadch_value = 0; npadch_active = true; } npadch_value = npadch_value * base + value; } static void k_lock(struct vc_data *vc, unsigned char value, char up_flag) { if (up_flag || rep) return; chg_vc_kbd_lock(kbd, value); } static void k_slock(struct vc_data *vc, unsigned char value, char up_flag) { k_shift(vc, value, up_flag); if (up_flag || rep) return; chg_vc_kbd_slock(kbd, value); /* try to make Alt, oops, AltGr and such work */ if (!key_maps[kbd->lockstate ^ kbd->slockstate]) { kbd->slockstate = 0; chg_vc_kbd_slock(kbd, value); } } /* by default, 300ms interval for combination release */ static unsigned brl_timeout = 300; MODULE_PARM_DESC(brl_timeout, "Braille keys release delay in ms (0 for commit on first key release)"); module_param(brl_timeout, uint, 0644); static unsigned brl_nbchords = 1; MODULE_PARM_DESC(brl_nbchords, "Number of chords that produce a braille pattern (0 for dead chords)"); module_param(brl_nbchords, uint, 0644); static void k_brlcommit(struct vc_data *vc, unsigned int pattern, char up_flag) { static unsigned long chords; static unsigned committed; if (!brl_nbchords) k_deadunicode(vc, BRL_UC_ROW | pattern, up_flag); else { committed |= pattern; chords++; if (chords == brl_nbchords) { k_unicode(vc, BRL_UC_ROW | committed, up_flag); chords = 0; committed = 0; } } } static void k_brl(struct vc_data *vc, unsigned char value, char up_flag) { static unsigned pressed, committing; static unsigned long releasestart; if (kbd->kbdmode != VC_UNICODE) { if (!up_flag) pr_warn("keyboard mode must be unicode for braille patterns\n"); return; } if (!value) { k_unicode(vc, BRL_UC_ROW, up_flag); return; } if (value > 8) return; if (!up_flag) { pressed |= BIT(value - 1); if (!brl_timeout) committing = pressed; } else if (brl_timeout) { if (!committing || time_after(jiffies, releasestart + msecs_to_jiffies(brl_timeout))) { committing = pressed; releasestart = jiffies; } pressed &= ~BIT(value - 1); if (!pressed && committing) { k_brlcommit(vc, committing, 0); committing = 0; } } else { if (committing) { k_brlcommit(vc, committing, 0); committing = 0; } pressed &= ~BIT(value - 1); } } #if IS_ENABLED(CONFIG_INPUT_LEDS) && IS_ENABLED(CONFIG_LEDS_TRIGGERS) struct kbd_led_trigger { struct led_trigger trigger; unsigned int mask; }; static int kbd_led_trigger_activate(struct led_classdev *cdev) { struct kbd_led_trigger *trigger = container_of(cdev->trigger, struct kbd_led_trigger, trigger); tasklet_disable(&keyboard_tasklet); if (ledstate != -1U) led_set_brightness(cdev, ledstate & trigger->mask ? LED_FULL : LED_OFF); tasklet_enable(&keyboard_tasklet); return 0; } #define KBD_LED_TRIGGER(_led_bit, _name) { \ .trigger = { \ .name = _name, \ .activate = kbd_led_trigger_activate, \ }, \ .mask = BIT(_led_bit), \ } #define KBD_LOCKSTATE_TRIGGER(_led_bit, _name) \ KBD_LED_TRIGGER((_led_bit) + 8, _name) static struct kbd_led_trigger kbd_led_triggers[] = { KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrolllock"), KBD_LED_TRIGGER(VC_NUMLOCK, "kbd-numlock"), KBD_LED_TRIGGER(VC_CAPSLOCK, "kbd-capslock"), KBD_LED_TRIGGER(VC_KANALOCK, "kbd-kanalock"), KBD_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"), KBD_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"), KBD_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"), KBD_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"), KBD_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"), KBD_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"), KBD_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"), KBD_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"), }; static void kbd_propagate_led_state(unsigned int old_state, unsigned int new_state) { struct kbd_led_trigger *trigger; unsigned int changed = old_state ^ new_state; int i; for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); i++) { trigger = &kbd_led_triggers[i]; if (changed & trigger->mask) led_trigger_event(&trigger->trigger, new_state & trigger->mask ? LED_FULL : LED_OFF); } } static int kbd_update_leds_helper(struct input_handle *handle, void *data) { unsigned int led_state = *(unsigned int *)data; if (test_bit(EV_LED, handle->dev->evbit)) kbd_propagate_led_state(~led_state, led_state); return 0; } static void kbd_init_leds(void) { int error; int i; for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); i++) { error = led_trigger_register(&kbd_led_triggers[i].trigger); if (error) pr_err("error %d while registering trigger %s\n", error, kbd_led_triggers[i].trigger.name); } } #else static int kbd_update_leds_helper(struct input_handle *handle, void *data) { unsigned int leds = *(unsigned int *)data; if (test_bit(EV_LED, handle->dev->evbit)) { input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & BIT(0))); input_inject_event(handle, EV_LED, LED_NUML, !!(leds & BIT(1))); input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & BIT(2))); input_inject_event(handle, EV_SYN, SYN_REPORT, 0); } return 0; } static void kbd_propagate_led_state(unsigned int old_state, unsigned int new_state) { input_handler_for_each_handle(&kbd_handler, &new_state, kbd_update_leds_helper); } static void kbd_init_leds(void) { } #endif /* * The leds display either (i) the status of NumLock, CapsLock, ScrollLock, * or (ii) whatever pattern of lights people want to show using KDSETLED, * or (iii) specified bits of specified words in kernel memory. */ static unsigned char getledstate(void) { return ledstate & 0xff; } void setledstate(struct kbd_struct *kb, unsigned int led) { unsigned long flags; spin_lock_irqsave(&led_lock, flags); if (!(led & ~7)) { ledioctl = led; kb->ledmode = LED_SHOW_IOCTL; } else kb->ledmode = LED_SHOW_FLAGS; set_leds(); spin_unlock_irqrestore(&led_lock, flags); } static inline unsigned char getleds(void) { struct kbd_struct *kb = kbd_table + fg_console; if (kb->ledmode == LED_SHOW_IOCTL) return ledioctl; return kb->ledflagstate; } /** * vt_get_leds - helper for braille console * @console: console to read * @flag: flag we want to check * * Check the status of a keyboard led flag and report it back */ int vt_get_leds(unsigned int console, int flag) { struct kbd_struct *kb = &kbd_table[console]; int ret; unsigned long flags; spin_lock_irqsave(&led_lock, flags); ret = vc_kbd_led(kb, flag); spin_unlock_irqrestore(&led_lock, flags); return ret; } EXPORT_SYMBOL_GPL(vt_get_leds); /** * vt_set_led_state - set LED state of a console * @console: console to set * @leds: LED bits * * Set the LEDs on a console. This is a wrapper for the VT layer * so that we can keep kbd knowledge internal */ void vt_set_led_state(unsigned int console, int leds) { struct kbd_struct *kb = &kbd_table[console]; setledstate(kb, leds); } /** * vt_kbd_con_start - Keyboard side of console start * @console: console * * Handle console start. This is a wrapper for the VT layer * so that we can keep kbd knowledge internal * * FIXME: We eventually need to hold the kbd lock here to protect * the LED updating. We can't do it yet because fn_hold calls stop_tty * and start_tty under the kbd_event_lock, while normal tty paths * don't hold the lock. We probably need to split out an LED lock * but not during an -rc release! */ void vt_kbd_con_start(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&led_lock, flags); clr_vc_kbd_led(kb, VC_SCROLLOCK); set_leds(); spin_unlock_irqrestore(&led_lock, flags); } /** * vt_kbd_con_stop - Keyboard side of console stop * @console: console * * Handle console stop. This is a wrapper for the VT layer * so that we can keep kbd knowledge internal */ void vt_kbd_con_stop(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&led_lock, flags); set_vc_kbd_led(kb, VC_SCROLLOCK); set_leds(); spin_unlock_irqrestore(&led_lock, flags); } /* * This is the tasklet that updates LED state of LEDs using standard * keyboard triggers. The reason we use tasklet is that we need to * handle the scenario when keyboard handler is not registered yet * but we already getting updates from the VT to update led state. */ static void kbd_bh(struct tasklet_struct *unused) { unsigned int leds; unsigned long flags; spin_lock_irqsave(&led_lock, flags); leds = getleds(); leds |= (unsigned int)kbd->lockstate << 8; spin_unlock_irqrestore(&led_lock, flags); if (vt_switch) { ledstate = ~leds; vt_switch = false; } if (leds != ledstate) { kbd_propagate_led_state(ledstate, leds); ledstate = leds; } } #if defined(CONFIG_X86) || defined(CONFIG_ALPHA) ||\ defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\ defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\ (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC)) static inline bool kbd_is_hw_raw(const struct input_dev *dev) { if (!test_bit(EV_MSC, dev->evbit) || !test_bit(MSC_RAW, dev->mscbit)) return false; return dev->id.bustype == BUS_I8042 && dev->id.vendor == 0x0001 && dev->id.product == 0x0001; } static const unsigned short x86_keycodes[256] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,118, 86, 87, 88,115,120,119,121,112,123, 92, 284,285,309, 0,312, 91,327,328,329,331,333,335,336,337,338,339, 367,288,302,304,350, 89,334,326,267,126,268,269,125,347,348,349, 360,261,262,263,268,376,100,101,321,316,373,286,289,102,351,355, 103,104,105,275,287,279,258,106,274,107,294,364,358,363,362,361, 291,108,381,281,290,272,292,305,280, 99,112,257,306,359,113,114, 264,117,271,374,379,265,266, 93, 94, 95, 85,259,375,260, 90,116, 377,109,111,277,278,282,283,295,296,297,299,300,301,293,303,307, 308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330, 332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 }; #ifdef CONFIG_SPARC static int sparc_l1_a_state; extern void sun_do_break(void); #endif static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag) { int code; switch (keycode) { case KEY_PAUSE: put_queue(vc, 0xe1); put_queue(vc, 0x1d | up_flag); put_queue(vc, 0x45 | up_flag); break; case KEY_HANGEUL: if (!up_flag) put_queue(vc, 0xf2); break; case KEY_HANJA: if (!up_flag) put_queue(vc, 0xf1); break; case KEY_SYSRQ: /* * Real AT keyboards (that's what we're trying * to emulate here) emit 0xe0 0x2a 0xe0 0x37 when * pressing PrtSc/SysRq alone, but simply 0x54 * when pressing Alt+PrtSc/SysRq. */ if (test_bit(KEY_LEFTALT, key_down) || test_bit(KEY_RIGHTALT, key_down)) { put_queue(vc, 0x54 | up_flag); } else { put_queue(vc, 0xe0); put_queue(vc, 0x2a | up_flag); put_queue(vc, 0xe0); put_queue(vc, 0x37 | up_flag); } break; default: if (keycode > 255) return -1; code = x86_keycodes[keycode]; if (!code) return -1; if (code & 0x100) put_queue(vc, 0xe0); put_queue(vc, (code & 0x7f) | up_flag); break; } return 0; } #else static inline bool kbd_is_hw_raw(const struct input_dev *dev) { return false; } static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag) { if (keycode > 127) return -1; put_queue(vc, keycode | up_flag); return 0; } #endif static void kbd_rawcode(unsigned char data) { struct vc_data *vc = vc_cons[fg_console].d; kbd = &kbd_table[vc->vc_num]; if (kbd->kbdmode == VC_RAW) put_queue(vc, data); } static void kbd_keycode(unsigned int keycode, int down, bool hw_raw) { struct vc_data *vc = vc_cons[fg_console].d; unsigned short keysym, *key_map; unsigned char type; bool raw_mode; struct tty_struct *tty; int shift_final; struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down }; int rc; tty = vc->port.tty; if (tty && (!tty->driver_data)) { /* No driver data? Strange. Okay we fix it then. */ tty->driver_data = vc; } kbd = &kbd_table[vc->vc_num]; #ifdef CONFIG_SPARC if (keycode == KEY_STOP) sparc_l1_a_state = down; #endif rep = (down == 2); raw_mode = (kbd->kbdmode == VC_RAW); if (raw_mode && !hw_raw) if (emulate_raw(vc, keycode, !down << 7)) if (keycode < BTN_MISC && printk_ratelimit()) pr_warn("can't emulate rawmode for keycode %d\n", keycode); #ifdef CONFIG_SPARC if (keycode == KEY_A && sparc_l1_a_state) { sparc_l1_a_state = false; sun_do_break(); } #endif if (kbd->kbdmode == VC_MEDIUMRAW) { /* * This is extended medium raw mode, with keys above 127 * encoded as 0, high 7 bits, low 7 bits, with the 0 bearing * the 'up' flag if needed. 0 is reserved, so this shouldn't * interfere with anything else. The two bytes after 0 will * always have the up flag set not to interfere with older * applications. This allows for 16384 different keycodes, * which should be enough. */ if (keycode < 128) { put_queue(vc, keycode | (!down << 7)); } else { put_queue(vc, !down << 7); put_queue(vc, (keycode >> 7) | BIT(7)); put_queue(vc, keycode | BIT(7)); } raw_mode = true; } assign_bit(keycode, key_down, down); if (rep && (!vc_kbd_mode(kbd, VC_REPEAT) || (tty && !L_ECHO(tty) && tty_chars_in_buffer(tty)))) { /* * Don't repeat a key if the input buffers are not empty and the * characters get aren't echoed locally. This makes key repeat * usable with slow applications and under heavy loads. */ return; } param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate; param.ledstate = kbd->ledflagstate; key_map = key_maps[shift_final]; rc = atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYCODE, ¶m); if (rc == NOTIFY_STOP || !key_map) { atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNBOUND_KEYCODE, ¶m); do_compute_shiftstate(); kbd->slockstate = 0; return; } if (keycode < NR_KEYS) keysym = key_map[keycode]; else if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8) keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1)); else return; type = KTYP(keysym); if (type < 0xf0) { param.value = keysym; rc = atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNICODE, ¶m); if (rc != NOTIFY_STOP) if (down && !raw_mode) k_unicode(vc, keysym, !down); return; } type -= 0xf0; if (type == KT_LETTER) { type = KT_LATIN; if (vc_kbd_led(kbd, VC_CAPSLOCK)) { key_map = key_maps[shift_final ^ BIT(KG_SHIFT)]; if (key_map) keysym = key_map[keycode]; } } param.value = keysym; rc = atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYSYM, ¶m); if (rc == NOTIFY_STOP) return; if ((raw_mode || kbd->kbdmode == VC_OFF) && type != KT_SPEC && type != KT_SHIFT) return; (*k_handler[type])(vc, keysym & 0xff, !down); param.ledstate = kbd->ledflagstate; atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, ¶m); if (type != KT_SLOCK) kbd->slockstate = 0; } static void kbd_event(struct input_handle *handle, unsigned int event_type, unsigned int event_code, int value) { /* We are called with interrupts disabled, just take the lock */ spin_lock(&kbd_event_lock); if (event_type == EV_MSC && event_code == MSC_RAW && kbd_is_hw_raw(handle->dev)) kbd_rawcode(value); if (event_type == EV_KEY && event_code <= KEY_MAX) kbd_keycode(event_code, value, kbd_is_hw_raw(handle->dev)); spin_unlock(&kbd_event_lock); tasklet_schedule(&keyboard_tasklet); do_poke_blanked_console = 1; schedule_console_callback(); } static bool kbd_match(struct input_handler *handler, struct input_dev *dev) { if (test_bit(EV_SND, dev->evbit)) return true; if (test_bit(EV_KEY, dev->evbit)) { if (find_next_bit(dev->keybit, BTN_MISC, KEY_RESERVED) < BTN_MISC) return true; if (find_next_bit(dev->keybit, KEY_BRL_DOT10 + 1, KEY_BRL_DOT1) <= KEY_BRL_DOT10) return true; } return false; } /* * When a keyboard (or other input device) is found, the kbd_connect * function is called. The function then looks at the device, and if it * likes it, it can open it and get events from it. In this (kbd_connect) * function, we should decide which VT to bind that keyboard to initially. */ static int kbd_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct input_handle *handle; int error; handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); if (!handle) return -ENOMEM; handle->dev = dev; handle->handler = handler; handle->name = "kbd"; error = input_register_handle(handle); if (error) goto err_free_handle; error = input_open_device(handle); if (error) goto err_unregister_handle; return 0; err_unregister_handle: input_unregister_handle(handle); err_free_handle: kfree(handle); return error; } static void kbd_disconnect(struct input_handle *handle) { input_close_device(handle); input_unregister_handle(handle); kfree(handle); } /* * Start keyboard handler on the new keyboard by refreshing LED state to * match the rest of the system. */ static void kbd_start(struct input_handle *handle) { tasklet_disable(&keyboard_tasklet); if (ledstate != -1U) kbd_update_leds_helper(handle, &ledstate); tasklet_enable(&keyboard_tasklet); } static const struct input_device_id kbd_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_KEY) }, }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, .evbit = { BIT_MASK(EV_SND) }, }, { }, /* Terminating entry */ }; MODULE_DEVICE_TABLE(input, kbd_ids); static struct input_handler kbd_handler = { .event = kbd_event, .match = kbd_match, .connect = kbd_connect, .disconnect = kbd_disconnect, .start = kbd_start, .name = "kbd", .id_table = kbd_ids, }; int __init kbd_init(void) { int i; int error; for (i = 0; i < MAX_NR_CONSOLES; i++) { kbd_table[i].ledflagstate = kbd_defleds(); kbd_table[i].default_ledflagstate = kbd_defleds(); kbd_table[i].ledmode = LED_SHOW_FLAGS; kbd_table[i].lockstate = KBD_DEFLOCK; kbd_table[i].slockstate = 0; kbd_table[i].modeflags = KBD_DEFMODE; kbd_table[i].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE; } kbd_init_leds(); error = input_register_handler(&kbd_handler); if (error) return error; tasklet_enable(&keyboard_tasklet); tasklet_schedule(&keyboard_tasklet); return 0; } /* Ioctl support code */ /** * vt_do_diacrit - diacritical table updates * @cmd: ioctl request * @udp: pointer to user data for ioctl * @perm: permissions check computed by caller * * Update the diacritical tables atomically and safely. Lock them * against simultaneous keypresses */ int vt_do_diacrit(unsigned int cmd, void __user *udp, int perm) { unsigned long flags; int asize; int ret = 0; switch (cmd) { case KDGKBDIACR: { struct kbdiacrs __user *a = udp; struct kbdiacr *dia; int i; dia = kmalloc_array(MAX_DIACR, sizeof(struct kbdiacr), GFP_KERNEL); if (!dia) return -ENOMEM; /* Lock the diacriticals table, make a copy and then copy it after we unlock */ spin_lock_irqsave(&kbd_event_lock, flags); asize = accent_table_size; for (i = 0; i < asize; i++) { dia[i].diacr = conv_uni_to_8bit( accent_table[i].diacr); dia[i].base = conv_uni_to_8bit( accent_table[i].base); dia[i].result = conv_uni_to_8bit( accent_table[i].result); } spin_unlock_irqrestore(&kbd_event_lock, flags); if (put_user(asize, &a->kb_cnt)) ret = -EFAULT; else if (copy_to_user(a->kbdiacr, dia, asize * sizeof(struct kbdiacr))) ret = -EFAULT; kfree(dia); return ret; } case KDGKBDIACRUC: { struct kbdiacrsuc __user *a = udp; void *buf; buf = kmalloc_array(MAX_DIACR, sizeof(struct kbdiacruc), GFP_KERNEL); if (buf == NULL) return -ENOMEM; /* Lock the diacriticals table, make a copy and then copy it after we unlock */ spin_lock_irqsave(&kbd_event_lock, flags); asize = accent_table_size; memcpy(buf, accent_table, asize * sizeof(struct kbdiacruc)); spin_unlock_irqrestore(&kbd_event_lock, flags); if (put_user(asize, &a->kb_cnt)) ret = -EFAULT; else if (copy_to_user(a->kbdiacruc, buf, asize*sizeof(struct kbdiacruc))) ret = -EFAULT; kfree(buf); return ret; } case KDSKBDIACR: { struct kbdiacrs __user *a = udp; struct kbdiacr *dia = NULL; unsigned int ct; int i; if (!perm) return -EPERM; if (get_user(ct, &a->kb_cnt)) return -EFAULT; if (ct >= MAX_DIACR) return -EINVAL; if (ct) { dia = memdup_array_user(a->kbdiacr, ct, sizeof(struct kbdiacr)); if (IS_ERR(dia)) return PTR_ERR(dia); } spin_lock_irqsave(&kbd_event_lock, flags); accent_table_size = ct; for (i = 0; i < ct; i++) { accent_table[i].diacr = conv_8bit_to_uni(dia[i].diacr); accent_table[i].base = conv_8bit_to_uni(dia[i].base); accent_table[i].result = conv_8bit_to_uni(dia[i].result); } spin_unlock_irqrestore(&kbd_event_lock, flags); kfree(dia); return 0; } case KDSKBDIACRUC: { struct kbdiacrsuc __user *a = udp; unsigned int ct; void *buf = NULL; if (!perm) return -EPERM; if (get_user(ct, &a->kb_cnt)) return -EFAULT; if (ct >= MAX_DIACR) return -EINVAL; if (ct) { buf = memdup_array_user(a->kbdiacruc, ct, sizeof(struct kbdiacruc)); if (IS_ERR(buf)) return PTR_ERR(buf); } spin_lock_irqsave(&kbd_event_lock, flags); if (ct) memcpy(accent_table, buf, ct * sizeof(struct kbdiacruc)); accent_table_size = ct; spin_unlock_irqrestore(&kbd_event_lock, flags); kfree(buf); return 0; } } return ret; } /** * vt_do_kdskbmode - set keyboard mode ioctl * @console: the console to use * @arg: the requested mode * * Update the keyboard mode bits while holding the correct locks. * Return 0 for success or an error code. */ int vt_do_kdskbmode(unsigned int console, unsigned int arg) { struct kbd_struct *kb = &kbd_table[console]; int ret = 0; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); switch(arg) { case K_RAW: kb->kbdmode = VC_RAW; break; case K_MEDIUMRAW: kb->kbdmode = VC_MEDIUMRAW; break; case K_XLATE: kb->kbdmode = VC_XLATE; do_compute_shiftstate(); break; case K_UNICODE: kb->kbdmode = VC_UNICODE; do_compute_shiftstate(); break; case K_OFF: kb->kbdmode = VC_OFF; break; default: ret = -EINVAL; } spin_unlock_irqrestore(&kbd_event_lock, flags); return ret; } /** * vt_do_kdskbmeta - set keyboard meta state * @console: the console to use * @arg: the requested meta state * * Update the keyboard meta bits while holding the correct locks. * Return 0 for success or an error code. */ int vt_do_kdskbmeta(unsigned int console, unsigned int arg) { struct kbd_struct *kb = &kbd_table[console]; int ret = 0; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); switch(arg) { case K_METABIT: clr_vc_kbd_mode(kb, VC_META); break; case K_ESCPREFIX: set_vc_kbd_mode(kb, VC_META); break; default: ret = -EINVAL; } spin_unlock_irqrestore(&kbd_event_lock, flags); return ret; } int vt_do_kbkeycode_ioctl(int cmd, struct kbkeycode __user *user_kbkc, int perm) { struct kbkeycode tmp; int kc = 0; if (copy_from_user(&tmp, user_kbkc, sizeof(struct kbkeycode))) return -EFAULT; switch (cmd) { case KDGETKEYCODE: kc = getkeycode(tmp.scancode); if (kc >= 0) kc = put_user(kc, &user_kbkc->keycode); break; case KDSETKEYCODE: if (!perm) return -EPERM; kc = setkeycode(tmp.scancode, tmp.keycode); break; } return kc; } static unsigned short vt_kdgkbent(unsigned char kbdmode, unsigned char idx, unsigned char map) { unsigned short *key_map, val; unsigned long flags; /* Ensure another thread doesn't free it under us */ spin_lock_irqsave(&kbd_event_lock, flags); key_map = key_maps[map]; if (key_map) { val = U(key_map[idx]); if (kbdmode != VC_UNICODE && KTYP(val) >= NR_TYPES) val = K_HOLE; } else val = idx ? K_HOLE : K_NOSUCHMAP; spin_unlock_irqrestore(&kbd_event_lock, flags); return val; } static int vt_kdskbent(unsigned char kbdmode, unsigned char idx, unsigned char map, unsigned short val) { unsigned long flags; unsigned short *key_map, *new_map, oldval; if (!idx && val == K_NOSUCHMAP) { spin_lock_irqsave(&kbd_event_lock, flags); /* deallocate map */ key_map = key_maps[map]; if (map && key_map) { key_maps[map] = NULL; if (key_map[0] == U(K_ALLOCATED)) { kfree(key_map); keymap_count--; } } spin_unlock_irqrestore(&kbd_event_lock, flags); return 0; } if (KTYP(val) < NR_TYPES) { if (KVAL(val) > max_vals[KTYP(val)]) return -EINVAL; } else if (kbdmode != VC_UNICODE) return -EINVAL; /* ++Geert: non-PC keyboards may generate keycode zero */ #if !defined(__mc68000__) && !defined(__powerpc__) /* assignment to entry 0 only tests validity of args */ if (!idx) return 0; #endif new_map = kmalloc(sizeof(plain_map), GFP_KERNEL); if (!new_map) return -ENOMEM; spin_lock_irqsave(&kbd_event_lock, flags); key_map = key_maps[map]; if (key_map == NULL) { int j; if (keymap_count >= MAX_NR_OF_USER_KEYMAPS && !capable(CAP_SYS_RESOURCE)) { spin_unlock_irqrestore(&kbd_event_lock, flags); kfree(new_map); return -EPERM; } key_maps[map] = new_map; key_map = new_map; key_map[0] = U(K_ALLOCATED); for (j = 1; j < NR_KEYS; j++) key_map[j] = U(K_HOLE); keymap_count++; } else kfree(new_map); oldval = U(key_map[idx]); if (val == oldval) goto out; /* Attention Key */ if ((oldval == K_SAK || val == K_SAK) && !capable(CAP_SYS_ADMIN)) { spin_unlock_irqrestore(&kbd_event_lock, flags); return -EPERM; } key_map[idx] = U(val); if (!map && (KTYP(oldval) == KT_SHIFT || KTYP(val) == KT_SHIFT)) do_compute_shiftstate(); out: spin_unlock_irqrestore(&kbd_event_lock, flags); return 0; } int vt_do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; struct kbentry kbe; if (copy_from_user(&kbe, user_kbe, sizeof(struct kbentry))) return -EFAULT; switch (cmd) { case KDGKBENT: return put_user(vt_kdgkbent(kb->kbdmode, kbe.kb_index, kbe.kb_table), &user_kbe->kb_value); case KDSKBENT: if (!perm || !capable(CAP_SYS_TTY_CONFIG)) return -EPERM; return vt_kdskbent(kb->kbdmode, kbe.kb_index, kbe.kb_table, kbe.kb_value); } return 0; } static char *vt_kdskbsent(char *kbs, unsigned char cur) { static DECLARE_BITMAP(is_kmalloc, MAX_NR_FUNC); char *cur_f = func_table[cur]; if (cur_f && strlen(cur_f) >= strlen(kbs)) { strcpy(cur_f, kbs); return kbs; } func_table[cur] = kbs; return __test_and_set_bit(cur, is_kmalloc) ? cur_f : NULL; } int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { unsigned char kb_func; unsigned long flags; char *kbs; int ret; if (get_user(kb_func, &user_kdgkb->kb_func)) return -EFAULT; kb_func = array_index_nospec(kb_func, MAX_NR_FUNC); switch (cmd) { case KDGKBSENT: { /* size should have been a struct member */ ssize_t len = sizeof(user_kdgkb->kb_string); kbs = kmalloc(len, GFP_KERNEL); if (!kbs) return -ENOMEM; spin_lock_irqsave(&func_buf_lock, flags); len = strscpy(kbs, func_table[kb_func] ? : "", len); spin_unlock_irqrestore(&func_buf_lock, flags); if (len < 0) { ret = -ENOSPC; break; } ret = copy_to_user(user_kdgkb->kb_string, kbs, len + 1) ? -EFAULT : 0; break; } case KDSKBSENT: if (!perm || !capable(CAP_SYS_TTY_CONFIG)) return -EPERM; kbs = strndup_user(user_kdgkb->kb_string, sizeof(user_kdgkb->kb_string)); if (IS_ERR(kbs)) return PTR_ERR(kbs); spin_lock_irqsave(&func_buf_lock, flags); kbs = vt_kdskbsent(kbs, kb_func); spin_unlock_irqrestore(&func_buf_lock, flags); ret = 0; break; } kfree(kbs); return ret; } int vt_do_kdskled(unsigned int console, int cmd, unsigned long arg, int perm) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; unsigned char ucval; switch(cmd) { /* the ioctls below read/set the flags usually shown in the leds */ /* don't use them - they will go away without warning */ case KDGKBLED: spin_lock_irqsave(&kbd_event_lock, flags); ucval = kb->ledflagstate | (kb->default_ledflagstate << 4); spin_unlock_irqrestore(&kbd_event_lock, flags); return put_user(ucval, (char __user *)arg); case KDSKBLED: if (!perm) return -EPERM; if (arg & ~0x77) return -EINVAL; spin_lock_irqsave(&led_lock, flags); kb->ledflagstate = (arg & 7); kb->default_ledflagstate = ((arg >> 4) & 7); set_leds(); spin_unlock_irqrestore(&led_lock, flags); return 0; /* the ioctls below only set the lights, not the functions */ /* for those, see KDGKBLED and KDSKBLED above */ case KDGETLED: ucval = getledstate(); return put_user(ucval, (char __user *)arg); case KDSETLED: if (!perm) return -EPERM; setledstate(kb, arg); return 0; } return -ENOIOCTLCMD; } int vt_do_kdgkbmode(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; /* This is a spot read so needs no locking */ switch (kb->kbdmode) { case VC_RAW: return K_RAW; case VC_MEDIUMRAW: return K_MEDIUMRAW; case VC_UNICODE: return K_UNICODE; case VC_OFF: return K_OFF; default: return K_XLATE; } } /** * vt_do_kdgkbmeta - report meta status * @console: console to report * * Report the meta flag status of this console */ int vt_do_kdgkbmeta(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; /* Again a spot read so no locking */ return vc_kbd_mode(kb, VC_META) ? K_ESCPREFIX : K_METABIT; } /** * vt_reset_unicode - reset the unicode status * @console: console being reset * * Restore the unicode console state to its default */ void vt_reset_unicode(unsigned int console) { unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); kbd_table[console].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE; spin_unlock_irqrestore(&kbd_event_lock, flags); } /** * vt_get_shift_state - shift bit state * * Report the shift bits from the keyboard state. We have to export * this to support some oddities in the vt layer. */ int vt_get_shift_state(void) { /* Don't lock as this is a transient report */ return shift_state; } /** * vt_reset_keyboard - reset keyboard state * @console: console to reset * * Reset the keyboard bits for a console as part of a general console * reset event */ void vt_reset_keyboard(unsigned int console) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); set_vc_kbd_mode(kb, VC_REPEAT); clr_vc_kbd_mode(kb, VC_CKMODE); clr_vc_kbd_mode(kb, VC_APPLIC); clr_vc_kbd_mode(kb, VC_CRLF); kb->lockstate = 0; kb->slockstate = 0; spin_lock(&led_lock); kb->ledmode = LED_SHOW_FLAGS; kb->ledflagstate = kb->default_ledflagstate; spin_unlock(&led_lock); /* do not do set_leds here because this causes an endless tasklet loop when the keyboard hasn't been initialized yet */ spin_unlock_irqrestore(&kbd_event_lock, flags); } /** * vt_get_kbd_mode_bit - read keyboard status bits * @console: console to read from * @bit: mode bit to read * * Report back a vt mode bit. We do this without locking so the * caller must be sure that there are no synchronization needs */ int vt_get_kbd_mode_bit(unsigned int console, int bit) { struct kbd_struct *kb = &kbd_table[console]; return vc_kbd_mode(kb, bit); } /** * vt_set_kbd_mode_bit - read keyboard status bits * @console: console to read from * @bit: mode bit to read * * Set a vt mode bit. We do this without locking so the * caller must be sure that there are no synchronization needs */ void vt_set_kbd_mode_bit(unsigned int console, int bit) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); set_vc_kbd_mode(kb, bit); spin_unlock_irqrestore(&kbd_event_lock, flags); } /** * vt_clr_kbd_mode_bit - read keyboard status bits * @console: console to read from * @bit: mode bit to read * * Report back a vt mode bit. We do this without locking so the * caller must be sure that there are no synchronization needs */ void vt_clr_kbd_mode_bit(unsigned int console, int bit) { struct kbd_struct *kb = &kbd_table[console]; unsigned long flags; spin_lock_irqsave(&kbd_event_lock, flags); clr_vc_kbd_mode(kb, bit); spin_unlock_irqrestore(&kbd_event_lock, flags); } |
| 3 4 4 4 1 1 2 4 4 4 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 | // SPDX-License-Identifier: GPL-2.0-only /* DVB USB compliant Linux driver for the AZUREWAVE DVB-S/S2 USB2.0 (AZ6027) * receiver. * * Copyright (C) 2009 Adams.Xu <adams.xu@azwave.com.cn> * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "az6027.h" #include "stb0899_drv.h" #include "stb0899_reg.h" #include "stb0899_cfg.h" #include "stb6100.h" #include "stb6100_cfg.h" #include <media/dvb_ca_en50221.h> int dvb_usb_az6027_debug; module_param_named(debug, dvb_usb_az6027_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2,rc=4 (or-able))." DVB_USB_DEBUG_STATUS); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); struct az6027_device_state { struct dvb_ca_en50221 ca; struct mutex ca_mutex; u8 power_state; }; static const struct stb0899_s1_reg az6027_stb0899_s1_init_1[] = { /* 0x0000000b, SYSREG */ { STB0899_DEV_ID , 0x30 }, { STB0899_DISCNTRL1 , 0x32 }, { STB0899_DISCNTRL2 , 0x80 }, { STB0899_DISRX_ST0 , 0x04 }, { STB0899_DISRX_ST1 , 0x00 }, { STB0899_DISPARITY , 0x00 }, { STB0899_DISSTATUS , 0x20 }, { STB0899_DISF22 , 0x99 }, { STB0899_DISF22RX , 0xa8 }, /* SYSREG ? */ { STB0899_ACRPRESC , 0x11 }, { STB0899_ACRDIV1 , 0x0a }, { STB0899_ACRDIV2 , 0x05 }, { STB0899_DACR1 , 0x00 }, { STB0899_DACR2 , 0x00 }, { STB0899_OUTCFG , 0x00 }, { STB0899_MODECFG , 0x00 }, { STB0899_IRQSTATUS_3 , 0xfe }, { STB0899_IRQSTATUS_2 , 0x03 }, { STB0899_IRQSTATUS_1 , 0x7c }, { STB0899_IRQSTATUS_0 , 0xf4 }, { STB0899_IRQMSK_3 , 0xf3 }, { STB0899_IRQMSK_2 , 0xfc }, { STB0899_IRQMSK_1 , 0xff }, { STB0899_IRQMSK_0 , 0xff }, { STB0899_IRQCFG , 0x00 }, { STB0899_I2CCFG , 0x88 }, { STB0899_I2CRPT , 0x58 }, { STB0899_IOPVALUE5 , 0x00 }, { STB0899_IOPVALUE4 , 0x33 }, { STB0899_IOPVALUE3 , 0x6d }, { STB0899_IOPVALUE2 , 0x90 }, { STB0899_IOPVALUE1 , 0x60 }, { STB0899_IOPVALUE0 , 0x00 }, { STB0899_GPIO00CFG , 0x82 }, { STB0899_GPIO01CFG , 0x82 }, { STB0899_GPIO02CFG , 0x82 }, { STB0899_GPIO03CFG , 0x82 }, { STB0899_GPIO04CFG , 0x82 }, { STB0899_GPIO05CFG , 0x82 }, { STB0899_GPIO06CFG , 0x82 }, { STB0899_GPIO07CFG , 0x82 }, { STB0899_GPIO08CFG , 0x82 }, { STB0899_GPIO09CFG , 0x82 }, { STB0899_GPIO10CFG , 0x82 }, { STB0899_GPIO11CFG , 0x82 }, { STB0899_GPIO12CFG , 0x82 }, { STB0899_GPIO13CFG , 0x82 }, { STB0899_GPIO14CFG , 0x82 }, { STB0899_GPIO15CFG , 0x82 }, { STB0899_GPIO16CFG , 0x82 }, { STB0899_GPIO17CFG , 0x82 }, { STB0899_GPIO18CFG , 0x82 }, { STB0899_GPIO19CFG , 0x82 }, { STB0899_GPIO20CFG , 0x82 }, { STB0899_SDATCFG , 0xb8 }, { STB0899_SCLTCFG , 0xba }, { STB0899_AGCRFCFG , 0x1c }, /* 0x11 */ { STB0899_GPIO22 , 0x82 }, /* AGCBB2CFG */ { STB0899_GPIO21 , 0x91 }, /* AGCBB1CFG */ { STB0899_DIRCLKCFG , 0x82 }, { STB0899_CLKOUT27CFG , 0x7e }, { STB0899_STDBYCFG , 0x82 }, { STB0899_CS0CFG , 0x82 }, { STB0899_CS1CFG , 0x82 }, { STB0899_DISEQCOCFG , 0x20 }, { STB0899_GPIO32CFG , 0x82 }, { STB0899_GPIO33CFG , 0x82 }, { STB0899_GPIO34CFG , 0x82 }, { STB0899_GPIO35CFG , 0x82 }, { STB0899_GPIO36CFG , 0x82 }, { STB0899_GPIO37CFG , 0x82 }, { STB0899_GPIO38CFG , 0x82 }, { STB0899_GPIO39CFG , 0x82 }, { STB0899_NCOARSE , 0x17 }, /* 0x15 = 27 Mhz Clock, F/3 = 198MHz, F/6 = 99MHz */ { STB0899_SYNTCTRL , 0x02 }, /* 0x00 = CLK from CLKI, 0x02 = CLK from XTALI */ { STB0899_FILTCTRL , 0x00 }, { STB0899_SYSCTRL , 0x01 }, { STB0899_STOPCLK1 , 0x20 }, { STB0899_STOPCLK2 , 0x00 }, { STB0899_INTBUFSTATUS , 0x00 }, { STB0899_INTBUFCTRL , 0x0a }, { 0xffff , 0xff }, }; static const struct stb0899_s1_reg az6027_stb0899_s1_init_3[] = { { STB0899_DEMOD , 0x00 }, { STB0899_RCOMPC , 0xc9 }, { STB0899_AGC1CN , 0x01 }, { STB0899_AGC1REF , 0x10 }, { STB0899_RTC , 0x23 }, { STB0899_TMGCFG , 0x4e }, { STB0899_AGC2REF , 0x34 }, { STB0899_TLSR , 0x84 }, { STB0899_CFD , 0xf7 }, { STB0899_ACLC , 0x87 }, { STB0899_BCLC , 0x94 }, { STB0899_EQON , 0x41 }, { STB0899_LDT , 0xf1 }, { STB0899_LDT2 , 0xe3 }, { STB0899_EQUALREF , 0xb4 }, { STB0899_TMGRAMP , 0x10 }, { STB0899_TMGTHD , 0x30 }, { STB0899_IDCCOMP , 0xfd }, { STB0899_QDCCOMP , 0xff }, { STB0899_POWERI , 0x0c }, { STB0899_POWERQ , 0x0f }, { STB0899_RCOMP , 0x6c }, { STB0899_AGCIQIN , 0x80 }, { STB0899_AGC2I1 , 0x06 }, { STB0899_AGC2I2 , 0x00 }, { STB0899_TLIR , 0x30 }, { STB0899_RTF , 0x7f }, { STB0899_DSTATUS , 0x00 }, { STB0899_LDI , 0xbc }, { STB0899_CFRM , 0xea }, { STB0899_CFRL , 0x31 }, { STB0899_NIRM , 0x2b }, { STB0899_NIRL , 0x80 }, { STB0899_ISYMB , 0x1d }, { STB0899_QSYMB , 0xa6 }, { STB0899_SFRH , 0x2f }, { STB0899_SFRM , 0x68 }, { STB0899_SFRL , 0x40 }, { STB0899_SFRUPH , 0x2f }, { STB0899_SFRUPM , 0x68 }, { STB0899_SFRUPL , 0x40 }, { STB0899_EQUAI1 , 0x02 }, { STB0899_EQUAQ1 , 0xff }, { STB0899_EQUAI2 , 0x04 }, { STB0899_EQUAQ2 , 0x05 }, { STB0899_EQUAI3 , 0x02 }, { STB0899_EQUAQ3 , 0xfd }, { STB0899_EQUAI4 , 0x03 }, { STB0899_EQUAQ4 , 0x07 }, { STB0899_EQUAI5 , 0x08 }, { STB0899_EQUAQ5 , 0xf5 }, { STB0899_DSTATUS2 , 0x00 }, { STB0899_VSTATUS , 0x00 }, { STB0899_VERROR , 0x86 }, { STB0899_IQSWAP , 0x2a }, { STB0899_ECNT1M , 0x00 }, { STB0899_ECNT1L , 0x00 }, { STB0899_ECNT2M , 0x00 }, { STB0899_ECNT2L , 0x00 }, { STB0899_ECNT3M , 0x0a }, { STB0899_ECNT3L , 0xad }, { STB0899_FECAUTO1 , 0x06 }, { STB0899_FECM , 0x01 }, { STB0899_VTH12 , 0xb0 }, { STB0899_VTH23 , 0x7a }, { STB0899_VTH34 , 0x58 }, { STB0899_VTH56 , 0x38 }, { STB0899_VTH67 , 0x34 }, { STB0899_VTH78 , 0x24 }, { STB0899_PRVIT , 0xff }, { STB0899_VITSYNC , 0x19 }, { STB0899_RSULC , 0xb1 }, /* DVB = 0xb1, DSS = 0xa1 */ { STB0899_TSULC , 0x42 }, { STB0899_RSLLC , 0x41 }, { STB0899_TSLPL , 0x12 }, { STB0899_TSCFGH , 0x0c }, { STB0899_TSCFGM , 0x00 }, { STB0899_TSCFGL , 0x00 }, { STB0899_TSOUT , 0x69 }, /* 0x0d for CAM */ { STB0899_RSSYNCDEL , 0x00 }, { STB0899_TSINHDELH , 0x02 }, { STB0899_TSINHDELM , 0x00 }, { STB0899_TSINHDELL , 0x00 }, { STB0899_TSLLSTKM , 0x1b }, { STB0899_TSLLSTKL , 0xb3 }, { STB0899_TSULSTKM , 0x00 }, { STB0899_TSULSTKL , 0x00 }, { STB0899_PCKLENUL , 0xbc }, { STB0899_PCKLENLL , 0xcc }, { STB0899_RSPCKLEN , 0xbd }, { STB0899_TSSTATUS , 0x90 }, { STB0899_ERRCTRL1 , 0xb6 }, { STB0899_ERRCTRL2 , 0x95 }, { STB0899_ERRCTRL3 , 0x8d }, { STB0899_DMONMSK1 , 0x27 }, { STB0899_DMONMSK0 , 0x03 }, { STB0899_DEMAPVIT , 0x5c }, { STB0899_PLPARM , 0x19 }, { STB0899_PDELCTRL , 0x48 }, { STB0899_PDELCTRL2 , 0x00 }, { STB0899_BBHCTRL1 , 0x00 }, { STB0899_BBHCTRL2 , 0x00 }, { STB0899_HYSTTHRESH , 0x77 }, { STB0899_MATCSTM , 0x00 }, { STB0899_MATCSTL , 0x00 }, { STB0899_UPLCSTM , 0x00 }, { STB0899_UPLCSTL , 0x00 }, { STB0899_DFLCSTM , 0x00 }, { STB0899_DFLCSTL , 0x00 }, { STB0899_SYNCCST , 0x00 }, { STB0899_SYNCDCSTM , 0x00 }, { STB0899_SYNCDCSTL , 0x00 }, { STB0899_ISI_ENTRY , 0x00 }, { STB0899_ISI_BIT_EN , 0x00 }, { STB0899_MATSTRM , 0xf0 }, { STB0899_MATSTRL , 0x02 }, { STB0899_UPLSTRM , 0x45 }, { STB0899_UPLSTRL , 0x60 }, { STB0899_DFLSTRM , 0xe3 }, { STB0899_DFLSTRL , 0x00 }, { STB0899_SYNCSTR , 0x47 }, { STB0899_SYNCDSTRM , 0x05 }, { STB0899_SYNCDSTRL , 0x18 }, { STB0899_CFGPDELSTATUS1 , 0x19 }, { STB0899_CFGPDELSTATUS2 , 0x2b }, { STB0899_BBFERRORM , 0x00 }, { STB0899_BBFERRORL , 0x01 }, { STB0899_UPKTERRORM , 0x00 }, { STB0899_UPKTERRORL , 0x00 }, { 0xffff , 0xff }, }; static struct stb0899_config az6027_stb0899_config = { .init_dev = az6027_stb0899_s1_init_1, .init_s2_demod = stb0899_s2_init_2, .init_s1_demod = az6027_stb0899_s1_init_3, .init_s2_fec = stb0899_s2_init_4, .init_tst = stb0899_s1_init_5, .demod_address = 0xd0, /* 0x68, 0xd0 >> 1 */ .xtal_freq = 27000000, .inversion = IQ_SWAP_ON, .lo_clk = 76500000, .hi_clk = 99000000, .esno_ave = STB0899_DVBS2_ESNO_AVE, .esno_quant = STB0899_DVBS2_ESNO_QUANT, .avframes_coarse = STB0899_DVBS2_AVFRAMES_COARSE, .avframes_fine = STB0899_DVBS2_AVFRAMES_FINE, .miss_threshold = STB0899_DVBS2_MISS_THRESHOLD, .uwp_threshold_acq = STB0899_DVBS2_UWP_THRESHOLD_ACQ, .uwp_threshold_track = STB0899_DVBS2_UWP_THRESHOLD_TRACK, .uwp_threshold_sof = STB0899_DVBS2_UWP_THRESHOLD_SOF, .sof_search_timeout = STB0899_DVBS2_SOF_SEARCH_TIMEOUT, .btr_nco_bits = STB0899_DVBS2_BTR_NCO_BITS, .btr_gain_shift_offset = STB0899_DVBS2_BTR_GAIN_SHIFT_OFFSET, .crl_nco_bits = STB0899_DVBS2_CRL_NCO_BITS, .ldpc_max_iter = STB0899_DVBS2_LDPC_MAX_ITER, .tuner_get_frequency = stb6100_get_frequency, .tuner_set_frequency = stb6100_set_frequency, .tuner_set_bandwidth = stb6100_set_bandwidth, .tuner_get_bandwidth = stb6100_get_bandwidth, .tuner_set_rfsiggain = NULL, }; static struct stb6100_config az6027_stb6100_config = { .tuner_address = 0xc0, .refclock = 27000000, }; /* check for mutex FIXME */ static int az6027_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen) { int ret = -1; if (mutex_lock_interruptible(&d->usb_mutex)) return -EAGAIN; ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), req, USB_TYPE_VENDOR | USB_DIR_IN, value, index, b, blen, 2000); if (ret < 0) { warn("usb in operation failed. (%d)", ret); ret = -EIO; } else ret = 0; deb_xfer("in: req. %02x, val: %04x, ind: %04x, buffer: ", req, value, index); debug_dump(b, blen, deb_xfer); mutex_unlock(&d->usb_mutex); return ret; } static int az6027_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen) { int ret; deb_xfer("out: req. %02x, val: %04x, ind: %04x, buffer: ", req, value, index); debug_dump(b, blen, deb_xfer); if (mutex_lock_interruptible(&d->usb_mutex)) return -EAGAIN; ret = usb_control_msg(d->udev, usb_sndctrlpipe(d->udev, 0), req, USB_TYPE_VENDOR | USB_DIR_OUT, value, index, b, blen, 2000); if (ret != blen) { warn("usb out operation failed. (%d)", ret); mutex_unlock(&d->usb_mutex); return -EIO; } else{ mutex_unlock(&d->usb_mutex); return 0; } } static int az6027_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { int ret; u8 req; u16 value; u16 index; int blen; deb_info("%s %d", __func__, onoff); req = 0xBC; value = onoff; index = 0; blen = 0; ret = az6027_usb_out_op(adap->dev, req, value, index, NULL, blen); if (ret != 0) warn("usb out operation failed. (%d)", ret); return ret; } /* keys for the enclosed remote control */ static struct rc_map_table rc_map_az6027_table[] = { { 0x01, KEY_1 }, { 0x02, KEY_2 }, }; /* remote control stuff (does not work with my box) */ static int az6027_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { *state = REMOTE_NO_KEY_PRESSED; return 0; } /* int az6027_power_ctrl(struct dvb_usb_device *d, int onoff) { u8 v = onoff; return az6027_usb_out_op(d,0xBC,v,3,NULL,1); } */ static int az6027_ci_read_attribute_mem(struct dvb_ca_en50221 *ca, int slot, int address) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret; u8 req; u16 value; u16 index; int blen; u8 *b; if (slot != 0) return -EINVAL; b = kmalloc(12, GFP_KERNEL); if (!b) return -ENOMEM; mutex_lock(&state->ca_mutex); req = 0xC1; value = address; index = 0; blen = 1; ret = az6027_usb_in_op(d, req, value, index, b, blen); if (ret < 0) { warn("usb in operation failed. (%d)", ret); ret = -EINVAL; } else { ret = b[0]; } mutex_unlock(&state->ca_mutex); kfree(b); return ret; } static int az6027_ci_write_attribute_mem(struct dvb_ca_en50221 *ca, int slot, int address, u8 value) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret; u8 req; u16 value1; u16 index; int blen; deb_info("%s %d", __func__, slot); if (slot != 0) return -EINVAL; mutex_lock(&state->ca_mutex); req = 0xC2; value1 = address; index = value; blen = 0; ret = az6027_usb_out_op(d, req, value1, index, NULL, blen); if (ret != 0) warn("usb out operation failed. (%d)", ret); mutex_unlock(&state->ca_mutex); return ret; } static int az6027_ci_read_cam_control(struct dvb_ca_en50221 *ca, int slot, u8 address) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret; u8 req; u16 value; u16 index; int blen; u8 *b; if (slot != 0) return -EINVAL; b = kmalloc(12, GFP_KERNEL); if (!b) return -ENOMEM; mutex_lock(&state->ca_mutex); req = 0xC3; value = address; index = 0; blen = 2; ret = az6027_usb_in_op(d, req, value, index, b, blen); if (ret < 0) { warn("usb in operation failed. (%d)", ret); ret = -EINVAL; } else { if (b[0] == 0) warn("Read CI IO error"); ret = b[1]; deb_info("read cam data = %x from 0x%x", b[1], value); } mutex_unlock(&state->ca_mutex); kfree(b); return ret; } static int az6027_ci_write_cam_control(struct dvb_ca_en50221 *ca, int slot, u8 address, u8 value) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret; u8 req; u16 value1; u16 index; int blen; if (slot != 0) return -EINVAL; mutex_lock(&state->ca_mutex); req = 0xC4; value1 = address; index = value; blen = 0; ret = az6027_usb_out_op(d, req, value1, index, NULL, blen); if (ret != 0) { warn("usb out operation failed. (%d)", ret); goto failed; } failed: mutex_unlock(&state->ca_mutex); return ret; } static int CI_CamReady(struct dvb_ca_en50221 *ca, int slot) { struct dvb_usb_device *d = ca->data; int ret; u8 req; u16 value; u16 index; int blen; u8 *b; b = kmalloc(12, GFP_KERNEL); if (!b) return -ENOMEM; req = 0xC8; value = 0; index = 0; blen = 1; ret = az6027_usb_in_op(d, req, value, index, b, blen); if (ret < 0) { warn("usb in operation failed. (%d)", ret); ret = -EIO; } else{ ret = b[0]; } kfree(b); return ret; } static int az6027_ci_slot_reset(struct dvb_ca_en50221 *ca, int slot) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret, i; u8 req; u16 value; u16 index; int blen; mutex_lock(&state->ca_mutex); req = 0xC6; value = 1; index = 0; blen = 0; ret = az6027_usb_out_op(d, req, value, index, NULL, blen); if (ret != 0) { warn("usb out operation failed. (%d)", ret); goto failed; } msleep(500); req = 0xC6; value = 0; index = 0; blen = 0; ret = az6027_usb_out_op(d, req, value, index, NULL, blen); if (ret != 0) { warn("usb out operation failed. (%d)", ret); goto failed; } for (i = 0; i < 15; i++) { msleep(100); if (CI_CamReady(ca, slot)) { deb_info("CAM Ready"); break; } } msleep(5000); failed: mutex_unlock(&state->ca_mutex); return ret; } static int az6027_ci_slot_shutdown(struct dvb_ca_en50221 *ca, int slot) { return 0; } static int az6027_ci_slot_ts_enable(struct dvb_ca_en50221 *ca, int slot) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret; u8 req; u16 value; u16 index; int blen; deb_info("%s", __func__); mutex_lock(&state->ca_mutex); req = 0xC7; value = 1; index = 0; blen = 0; ret = az6027_usb_out_op(d, req, value, index, NULL, blen); if (ret != 0) { warn("usb out operation failed. (%d)", ret); goto failed; } failed: mutex_unlock(&state->ca_mutex); return ret; } static int az6027_ci_poll_slot_status(struct dvb_ca_en50221 *ca, int slot, int open) { struct dvb_usb_device *d = ca->data; struct az6027_device_state *state = d->priv; int ret; u8 req; u16 value; u16 index; int blen; u8 *b; b = kmalloc(12, GFP_KERNEL); if (!b) return -ENOMEM; mutex_lock(&state->ca_mutex); req = 0xC5; value = 0; index = 0; blen = 1; ret = az6027_usb_in_op(d, req, value, index, b, blen); if (ret < 0) { warn("usb in operation failed. (%d)", ret); ret = -EIO; } else ret = 0; if (!ret && b[0] == 1) { ret = DVB_CA_EN50221_POLL_CAM_PRESENT | DVB_CA_EN50221_POLL_CAM_READY; } mutex_unlock(&state->ca_mutex); kfree(b); return ret; } static void az6027_ci_uninit(struct dvb_usb_device *d) { struct az6027_device_state *state; deb_info("%s", __func__); if (NULL == d) return; state = d->priv; if (NULL == state) return; if (NULL == state->ca.data) return; dvb_ca_en50221_release(&state->ca); memset(&state->ca, 0, sizeof(state->ca)); } static int az6027_ci_init(struct dvb_usb_adapter *a) { struct dvb_usb_device *d = a->dev; struct az6027_device_state *state = d->priv; int ret; deb_info("%s", __func__); mutex_init(&state->ca_mutex); state->ca.owner = THIS_MODULE; state->ca.read_attribute_mem = az6027_ci_read_attribute_mem; state->ca.write_attribute_mem = az6027_ci_write_attribute_mem; state->ca.read_cam_control = az6027_ci_read_cam_control; state->ca.write_cam_control = az6027_ci_write_cam_control; state->ca.slot_reset = az6027_ci_slot_reset; state->ca.slot_shutdown = az6027_ci_slot_shutdown; state->ca.slot_ts_enable = az6027_ci_slot_ts_enable; state->ca.poll_slot_status = az6027_ci_poll_slot_status; state->ca.data = d; ret = dvb_ca_en50221_init(&a->dvb_adap, &state->ca, 0, /* flags */ 1);/* n_slots */ if (ret != 0) { err("Cannot initialize CI: Error %d.", ret); memset(&state->ca, 0, sizeof(state->ca)); return ret; } deb_info("CI initialized."); return 0; } /* static int az6027_read_mac_addr(struct dvb_usb_device *d, u8 mac[6]) { az6027_usb_in_op(d, 0xb7, 6, 0, &mac[0], 6); return 0; } */ static int az6027_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage) { u8 buf; struct dvb_usb_adapter *adap = fe->dvb->priv; struct i2c_msg i2c_msg = { .addr = 0x99, .flags = 0, .buf = &buf, .len = 1 }; /* * 2 --18v * 1 --13v * 0 --off */ switch (voltage) { case SEC_VOLTAGE_13: buf = 1; i2c_transfer(&adap->dev->i2c_adap, &i2c_msg, 1); break; case SEC_VOLTAGE_18: buf = 2; i2c_transfer(&adap->dev->i2c_adap, &i2c_msg, 1); break; case SEC_VOLTAGE_OFF: buf = 0; i2c_transfer(&adap->dev->i2c_adap, &i2c_msg, 1); break; default: return -EINVAL; } return 0; } static int az6027_frontend_poweron(struct dvb_usb_adapter *adap) { int ret; u8 req; u16 value; u16 index; int blen; req = 0xBC; value = 1; /* power on */ index = 3; blen = 0; ret = az6027_usb_out_op(adap->dev, req, value, index, NULL, blen); if (ret != 0) return -EIO; return 0; } static int az6027_frontend_reset(struct dvb_usb_adapter *adap) { int ret; u8 req; u16 value; u16 index; int blen; /* reset demodulator */ req = 0xC0; value = 1; /* high */ index = 3; blen = 0; ret = az6027_usb_out_op(adap->dev, req, value, index, NULL, blen); if (ret != 0) return -EIO; req = 0xC0; value = 0; /* low */ index = 3; blen = 0; msleep_interruptible(200); ret = az6027_usb_out_op(adap->dev, req, value, index, NULL, blen); if (ret != 0) return -EIO; msleep_interruptible(200); req = 0xC0; value = 1; /*high */ index = 3; blen = 0; ret = az6027_usb_out_op(adap->dev, req, value, index, NULL, blen); if (ret != 0) return -EIO; msleep_interruptible(200); return 0; } static int az6027_frontend_tsbypass(struct dvb_usb_adapter *adap, int onoff) { int ret; u8 req; u16 value; u16 index; int blen; /* TS passthrough */ req = 0xC7; value = onoff; index = 0; blen = 0; ret = az6027_usb_out_op(adap->dev, req, value, index, NULL, blen); if (ret != 0) return -EIO; return 0; } static int az6027_frontend_attach(struct dvb_usb_adapter *adap) { az6027_frontend_poweron(adap); az6027_frontend_reset(adap); deb_info("adap = %p, dev = %p\n", adap, adap->dev); adap->fe_adap[0].fe = stb0899_attach(&az6027_stb0899_config, &adap->dev->i2c_adap); if (adap->fe_adap[0].fe) { deb_info("found STB0899 DVB-S/DVB-S2 frontend @0x%02x", az6027_stb0899_config.demod_address); if (stb6100_attach(adap->fe_adap[0].fe, &az6027_stb6100_config, &adap->dev->i2c_adap)) { deb_info("found STB6100 DVB-S/DVB-S2 frontend @0x%02x", az6027_stb6100_config.tuner_address); adap->fe_adap[0].fe->ops.set_voltage = az6027_set_voltage; az6027_ci_init(adap); } else { adap->fe_adap[0].fe = NULL; } } else warn("no front-end attached\n"); az6027_frontend_tsbypass(adap, 0); return 0; } static struct dvb_usb_device_properties az6027_properties; static void az6027_usb_disconnect(struct usb_interface *intf) { struct dvb_usb_device *d = usb_get_intfdata(intf); az6027_ci_uninit(d); dvb_usb_device_exit(intf); } static int az6027_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return dvb_usb_device_init(intf, &az6027_properties, THIS_MODULE, NULL, adapter_nr); } /* I2C */ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); int i = 0, j = 0, len = 0; u16 index; u16 value; int length; u8 req; u8 *data; data = kmalloc(256, GFP_KERNEL); if (!data) return -ENOMEM; if (mutex_lock_interruptible(&d->i2c_mutex) < 0) { kfree(data); return -EAGAIN; } if (num > 2) warn("more than 2 i2c messages at a time is not handled yet. TODO."); for (i = 0; i < num; i++) { if (msg[i].addr == 0x99) { req = 0xBE; index = 0; if (msg[i].len < 1) { i = -EOPNOTSUPP; break; } value = msg[i].buf[0] & 0x00ff; length = 1; az6027_usb_out_op(d, req, value, index, data, length); } if (msg[i].addr == 0xd0) { /* write/read request */ if (i + 1 < num && (msg[i + 1].flags & I2C_M_RD)) { req = 0xB9; if (msg[i].len < 1) { i = -EOPNOTSUPP; break; } index = (((msg[i].buf[0] << 8) & 0xff00) | (msg[i].buf[1] & 0x00ff)); value = msg[i].addr + (msg[i].len << 8); length = msg[i + 1].len + 6; az6027_usb_in_op(d, req, value, index, data, length); len = msg[i + 1].len; for (j = 0; j < len; j++) msg[i + 1].buf[j] = data[j + 5]; i++; } else { /* demod 16bit addr */ req = 0xBD; if (msg[i].len < 1) { i = -EOPNOTSUPP; break; } index = (((msg[i].buf[0] << 8) & 0xff00) | (msg[i].buf[1] & 0x00ff)); value = msg[i].addr + (2 << 8); length = msg[i].len - 2; len = msg[i].len - 2; for (j = 0; j < len; j++) data[j] = msg[i].buf[j + 2]; az6027_usb_out_op(d, req, value, index, data, length); } } if (msg[i].addr == 0xc0) { if (msg[i].flags & I2C_M_RD) { req = 0xB9; index = 0x0; value = msg[i].addr; length = msg[i].len + 6; az6027_usb_in_op(d, req, value, index, data, length); len = msg[i].len; for (j = 0; j < len; j++) msg[i].buf[j] = data[j + 5]; } else { req = 0xBD; if (msg[i].len < 1) { i = -EOPNOTSUPP; break; } index = msg[i].buf[0] & 0x00FF; value = msg[i].addr + (1 << 8); length = msg[i].len - 1; len = msg[i].len - 1; for (j = 0; j < len; j++) data[j] = msg[i].buf[j + 1]; az6027_usb_out_op(d, req, value, index, data, length); } } } mutex_unlock(&d->i2c_mutex); kfree(data); return i; } static u32 az6027_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm az6027_i2c_algo = { .master_xfer = az6027_i2c_xfer, .functionality = az6027_i2c_func, }; static int az6027_identify_state(struct usb_device *udev, const struct dvb_usb_device_properties *props, const struct dvb_usb_device_description **desc, int *cold) { u8 *b; s16 ret; b = kmalloc(16, GFP_KERNEL); if (!b) return -ENOMEM; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0xb7, USB_TYPE_VENDOR | USB_DIR_IN, 6, 0, b, 6, USB_CTRL_GET_TIMEOUT); *cold = ret <= 0; kfree(b); deb_info("cold: %d\n", *cold); return 0; } enum { AZUREWAVE_AZ6027, TERRATEC_DVBS2CI_V1, TERRATEC_DVBS2CI_V2, TECHNISAT_USB2_HDCI_V1, TECHNISAT_USB2_HDCI_V2, ELGATO_EYETV_SAT, ELGATO_EYETV_SAT_V2, ELGATO_EYETV_SAT_V3, }; static struct usb_device_id az6027_usb_table[] = { DVB_USB_DEV(AZUREWAVE, AZUREWAVE_AZ6027), DVB_USB_DEV(TERRATEC, TERRATEC_DVBS2CI_V1), DVB_USB_DEV(TERRATEC, TERRATEC_DVBS2CI_V2), DVB_USB_DEV(TECHNISAT, TECHNISAT_USB2_HDCI_V1), DVB_USB_DEV(TECHNISAT, TECHNISAT_USB2_HDCI_V2), DVB_USB_DEV(ELGATO, ELGATO_EYETV_SAT), DVB_USB_DEV(ELGATO, ELGATO_EYETV_SAT_V2), DVB_USB_DEV(ELGATO, ELGATO_EYETV_SAT_V3), { } }; MODULE_DEVICE_TABLE(usb, az6027_usb_table); static struct dvb_usb_device_properties az6027_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-az6027-03.fw", .no_reconnect = 1, .size_of_priv = sizeof(struct az6027_device_state), .identify_state = az6027_identify_state, .num_adapters = 1, .adapter = { { .num_frontends = 1, .fe = {{ .streaming_ctrl = az6027_streaming_ctrl, .frontend_attach = az6027_frontend_attach, /* parameter for the MPEG2-data transfer */ .stream = { .type = USB_BULK, .count = 10, .endpoint = 0x02, .u = { .bulk = { .buffersize = 4096, } } }, }}, } }, /* .power_ctrl = az6027_power_ctrl, .read_mac_address = az6027_read_mac_addr, */ .rc.legacy = { .rc_map_table = rc_map_az6027_table, .rc_map_size = ARRAY_SIZE(rc_map_az6027_table), .rc_interval = 400, .rc_query = az6027_rc_query, }, .i2c_algo = &az6027_i2c_algo, .num_device_descs = 8, .devices = { { .name = "AZUREWAVE DVB-S/S2 USB2.0 (AZ6027)", .cold_ids = { &az6027_usb_table[AZUREWAVE_AZ6027], NULL }, .warm_ids = { NULL }, }, { .name = "TERRATEC S7", .cold_ids = { &az6027_usb_table[TERRATEC_DVBS2CI_V1], NULL }, .warm_ids = { NULL }, }, { .name = "TERRATEC S7 MKII", .cold_ids = { &az6027_usb_table[TERRATEC_DVBS2CI_V2], NULL }, .warm_ids = { NULL }, }, { .name = "Technisat SkyStar USB 2 HD CI", .cold_ids = { &az6027_usb_table[TECHNISAT_USB2_HDCI_V1], NULL }, .warm_ids = { NULL }, }, { .name = "Technisat SkyStar USB 2 HD CI", .cold_ids = { &az6027_usb_table[TECHNISAT_USB2_HDCI_V2], NULL }, .warm_ids = { NULL }, }, { .name = "Elgato EyeTV Sat", .cold_ids = { &az6027_usb_table[ELGATO_EYETV_SAT], NULL }, .warm_ids = { NULL }, }, { .name = "Elgato EyeTV Sat", .cold_ids = { &az6027_usb_table[ELGATO_EYETV_SAT_V2], NULL }, .warm_ids = { NULL }, }, { .name = "Elgato EyeTV Sat", .cold_ids = { &az6027_usb_table[ELGATO_EYETV_SAT_V3], NULL }, .warm_ids = { NULL }, }, { NULL }, } }; /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver az6027_usb_driver = { .name = "dvb_usb_az6027", .probe = az6027_usb_probe, .disconnect = az6027_usb_disconnect, .id_table = az6027_usb_table, }; module_usb_driver(az6027_usb_driver); MODULE_AUTHOR("Adams Xu <Adams.xu@azwave.com.cn>"); MODULE_DESCRIPTION("Driver for AZUREWAVE DVB-S/S2 USB2.0 (AZ6027)"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL"); |
| 387 386 387 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Devmem TCP * * Authors: Mina Almasry <almasrymina@google.com> * Willem de Bruijn <willemdebruijn.kernel@gmail.com> * Kaiyuan Zhang <kaiyuanz@google.com */ #include <linux/dma-buf.h> #include <linux/ethtool_netlink.h> #include <linux/genalloc.h> #include <linux/mm.h> #include <linux/netdevice.h> #include <linux/types.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <trace/events/page_pool.h> #include "devmem.h" #include "mp_dmabuf_devmem.h" #include "page_pool_priv.h" /* Device memory support */ /* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) { struct dmabuf_genpool_chunk_owner *owner = chunk->owner; kvfree(owner->niovs); kfree(owner); } static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) { struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); return owner->base_dma_addr + ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); } void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) { size_t size, avail; gen_pool_for_each_chunk(binding->chunk_pool, net_devmem_dmabuf_free_chunk_owner, NULL); size = gen_pool_size(binding->chunk_pool); avail = gen_pool_avail(binding->chunk_pool); if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu", size, avail)) gen_pool_destroy(binding->chunk_pool); dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, DMA_FROM_DEVICE); dma_buf_detach(binding->dmabuf, binding->attachment); dma_buf_put(binding->dmabuf); xa_destroy(&binding->bound_rxqs); kfree(binding); } struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) { struct dmabuf_genpool_chunk_owner *owner; unsigned long dma_addr; struct net_iov *niov; ssize_t offset; ssize_t index; dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, (void **)&owner); if (!dma_addr) return NULL; offset = dma_addr - owner->base_dma_addr; index = offset / PAGE_SIZE; niov = &owner->niovs[index]; niov->pp_magic = 0; niov->pp = NULL; atomic_long_set(&niov->pp_ref_count, 0); return niov; } void net_devmem_free_dmabuf(struct net_iov *niov) { struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); unsigned long dma_addr = net_devmem_get_dma_addr(niov); if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, PAGE_SIZE))) return; gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE); } void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) { struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; if (binding->list.next) list_del(&binding->list); xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { WARN_ON(rxq->mp_params.mp_priv != binding); rxq->mp_params.mp_priv = NULL; rxq_idx = get_netdev_rx_queue_index(rxq); WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); net_devmem_dmabuf_binding_put(binding); } int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack) { struct netdev_rx_queue *rxq; u32 xa_idx; int err; if (rxq_idx >= dev->real_num_rx_queues) { NL_SET_ERR_MSG(extack, "rx queue index out of range"); return -ERANGE; } if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; } if (dev->cfg->hds_thresh) { NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); return -EINVAL; } rxq = __netif_get_rx_queue(dev, rxq_idx); if (rxq->mp_params.mp_priv) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); return -EEXIST; } #ifdef CONFIG_XDP_SOCKETS if (rxq->pool) { NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); return -EBUSY; } #endif err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, GFP_KERNEL); if (err) return err; rxq->mp_params.mp_priv = binding; err = netdev_rx_queue_restart(dev, rxq_idx); if (err) goto err_xa_erase; return 0; err_xa_erase: rxq->mp_params.mp_priv = NULL; xa_erase(&binding->bound_rxqs, xa_idx); return err; } struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, struct netlink_ext_ack *extack) { struct net_devmem_dmabuf_binding *binding; static u32 id_alloc_next; struct scatterlist *sg; struct dma_buf *dmabuf; unsigned int sg_idx, i; unsigned long virtual; int err; dmabuf = dma_buf_get(dmabuf_fd); if (IS_ERR(dmabuf)) return ERR_CAST(dmabuf); binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, dev_to_node(&dev->dev)); if (!binding) { err = -ENOMEM; goto err_put_dmabuf; } binding->dev = dev; err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, binding, xa_limit_32b, &id_alloc_next, GFP_KERNEL); if (err < 0) goto err_free_binding; xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); refcount_set(&binding->ref, 1); binding->dmabuf = dmabuf; binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); if (IS_ERR(binding->attachment)) { err = PTR_ERR(binding->attachment); NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); goto err_free_id; } binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, DMA_FROM_DEVICE); if (IS_ERR(binding->sgt)) { err = PTR_ERR(binding->sgt); NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); goto err_detach; } /* For simplicity we expect to make PAGE_SIZE allocations, but the * binding can be much more flexible than that. We may be able to * allocate MTU sized chunks here. Leave that for future work... */ binding->chunk_pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); if (!binding->chunk_pool) { err = -ENOMEM; goto err_unmap; } virtual = 0; for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) { dma_addr_t dma_addr = sg_dma_address(sg); struct dmabuf_genpool_chunk_owner *owner; size_t len = sg_dma_len(sg); struct net_iov *niov; owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, dev_to_node(&dev->dev)); if (!owner) { err = -ENOMEM; goto err_free_chunks; } owner->base_virtual = virtual; owner->base_dma_addr = dma_addr; owner->num_niovs = len / PAGE_SIZE; owner->binding = binding; err = gen_pool_add_owner(binding->chunk_pool, dma_addr, dma_addr, len, dev_to_node(&dev->dev), owner); if (err) { kfree(owner); err = -EINVAL; goto err_free_chunks; } owner->niovs = kvmalloc_array(owner->num_niovs, sizeof(*owner->niovs), GFP_KERNEL); if (!owner->niovs) { err = -ENOMEM; goto err_free_chunks; } for (i = 0; i < owner->num_niovs; i++) { niov = &owner->niovs[i]; niov->owner = owner; page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); } virtual += len; } return binding; err_free_chunks: gen_pool_for_each_chunk(binding->chunk_pool, net_devmem_dmabuf_free_chunk_owner, NULL); gen_pool_destroy(binding->chunk_pool); err_unmap: dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, DMA_FROM_DEVICE); err_detach: dma_buf_detach(dmabuf, binding->attachment); err_free_id: xa_erase(&net_devmem_dmabuf_bindings, binding->id); err_free_binding: kfree(binding); err_put_dmabuf: dma_buf_put(dmabuf); return ERR_PTR(err); } void dev_dmabuf_uninstall(struct net_device *dev) { struct net_devmem_dmabuf_binding *binding; struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int i; for (i = 0; i < dev->real_num_rx_queues; i++) { binding = dev->_rx[i].mp_params.mp_priv; if (!binding) continue; xa_for_each(&binding->bound_rxqs, xa_idx, rxq) if (rxq == &dev->_rx[i]) { xa_erase(&binding->bound_rxqs, xa_idx); break; } } } /*** "Dmabuf devmem memory provider" ***/ int mp_dmabuf_devmem_init(struct page_pool *pool) { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; if (!binding) return -EINVAL; /* dma-buf dma addresses do not need and should not be used with * dma_sync_for_cpu/device. Force disable dma_sync. */ pool->dma_sync = false; pool->dma_sync_for_cpu = false; if (pool->p.order != 0) return -E2BIG; net_devmem_dmabuf_binding_get(binding); return 0; } netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp) { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; struct net_iov *niov; netmem_ref netmem; niov = net_devmem_alloc_dmabuf(binding); if (!niov) return 0; netmem = net_iov_to_netmem(niov); page_pool_set_pp_info(pool, netmem); pool->pages_state_hold_cnt++; trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); return netmem; } void mp_dmabuf_devmem_destroy(struct page_pool *pool) { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; net_devmem_dmabuf_binding_put(binding); } bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) { long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem)); if (WARN_ON_ONCE(!netmem_is_net_iov(netmem))) return false; if (WARN_ON_ONCE(refcount != 1)) return false; page_pool_clear_pp_info(netmem); net_devmem_free_dmabuf(netmem_to_net_iov(netmem)); /* We don't want the page pool put_page()ing our net_iovs. */ return false; } |
| 1 78 77 78 78 77 78 77 77 78 78 78 77 77 78 78 77 78 78 4 4 4 4 4 4 2 2 2 4 4 4 4 3 3 3 3 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 | // SPDX-License-Identifier: GPL-2.0-only /* * sd.c Copyright (C) 1992 Drew Eckhardt * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale * * Linux scsi disk driver * Initial versions: Drew Eckhardt * Subsequent revisions: Eric Youngdale * Modification history: * - Drew Eckhardt <drew@colorado.edu> original * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple * outstanding request, and other enhancements. * Support loadable low-level scsi drivers. * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using * eight major numbers. * - Richard Gooch <rgooch@atnf.csiro.au> support devfs. * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in * sd_init and cleanups. * - Alex Davis <letmein@erols.com> Fix problem where partition info * not being read in sd_open. Fix problem where removable media * could be ejected after sd_open. * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox * <willy@debian.org>, Kurt Garloff <garloff@suse.de>: * Support 32k/1M disks. * * Logging policy (needs CONFIG_SCSI_LOGGING defined): * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2 * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1 * - entering sd_ioctl: SCSI_LOG_IOCTL level 1 * - entering other commands: SCSI_LOG_HLQUEUE level 3 * Note: when the logging level is set by the user, it must be greater * than the level indicated above to trigger output. */ #include <linux/bio-integrity.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/hdreg.h> #include <linux/errno.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/blkpg.h> #include <linux/blk-pm.h> #include <linux/delay.h> #include <linux/rw_hint.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/string_helpers.h> #include <linux/slab.h> #include <linux/sed-opal.h> #include <linux/pm_runtime.h> #include <linux/pr.h> #include <linux/t10-pi.h> #include <linux/uaccess.h> #include <linux/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_devinfo.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> #include <scsi/scsicam.h> #include <scsi/scsi_common.h> #include "sd.h" #include "scsi_priv.h" #include "scsi_logging.h" MODULE_AUTHOR("Eric Youngdale"); MODULE_DESCRIPTION("SCSI disk (sd) driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); #define SD_MINORS 16 static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned int mode); static void sd_config_write_same(struct scsi_disk *sdkp, struct queue_limits *lim); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); static void sd_shutdown(struct device *); static void scsi_disk_release(struct device *cdev); static DEFINE_IDA(sd_index_ida); static mempool_t *sd_page_pool; static struct lock_class_key sd_bio_compl_lkclass; static const char *sd_cache_types[] = { "write through", "none", "write back", "write back, no read (daft)" }; static void sd_set_flush_flag(struct scsi_disk *sdkp, struct queue_limits *lim) { if (sdkp->WCE) { lim->features |= BLK_FEAT_WRITE_CACHE; if (sdkp->DPOFUA) lim->features |= BLK_FEAT_FUA; else lim->features &= ~BLK_FEAT_FUA; } else { lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); } } static ssize_t cache_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ct, rcd, wce, sp; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; char buffer[64]; char *buffer_data; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; static const char temp[] = "temporary "; int len, ret; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) /* no cache control on RBC devices; theoretically they * can do it, but there's probably so many exceptions * it's not worth the risk */ return -EINVAL; if (strncmp(buf, temp, sizeof(temp) - 1) == 0) { buf += sizeof(temp) - 1; sdkp->cache_override = 1; } else { sdkp->cache_override = 0; } ct = sysfs_match_string(sd_cache_types, buf); if (ct < 0) return -EINVAL; rcd = ct & 0x01 ? 1 : 0; wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0; if (sdkp->cache_override) { struct queue_limits lim; sdkp->WCE = wce; sdkp->RCD = rcd; lim = queue_limits_start_update(sdkp->disk->queue); sd_set_flush_flag(sdkp, &lim); ret = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (ret) return ret; return count; } if (scsi_mode_sense(sdp, 0x08, 8, 0, buffer, sizeof(buffer), SD_TIMEOUT, sdkp->max_retries, &data, NULL)) return -EINVAL; len = min_t(size_t, sizeof(buffer), data.length - data.header_length - data.block_descriptor_length); buffer_data = buffer + data.header_length + data.block_descriptor_length; buffer_data[2] &= ~0x05; buffer_data[2] |= wce << 2 | rcd; sp = buffer_data[0] & 0x80 ? 1 : 0; buffer_data[0] &= ~0x80; /* * Ensure WP, DPOFUA, and RESERVED fields are cleared in * received mode parameter buffer before doing MODE SELECT. */ data.device_specific = 0; ret = scsi_mode_select(sdp, 1, sp, buffer_data, len, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (ret) { if (ret > 0 && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } sd_revalidate_disk(sdkp->disk); return count; } static ssize_t manage_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop && sdp->manage_runtime_start_stop && sdp->manage_shutdown); } static DEVICE_ATTR_RO(manage_start_stop); static ssize_t manage_system_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop); } static ssize_t manage_system_start_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_system_start_stop = v; return count; } static DEVICE_ATTR_RW(manage_system_start_stop); static ssize_t manage_runtime_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop); } static ssize_t manage_runtime_start_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_runtime_start_stop = v; return count; } static DEVICE_ATTR_RW(manage_runtime_start_stop); static ssize_t manage_shutdown_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_shutdown); } static ssize_t manage_shutdown_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_shutdown = v; return count; } static DEVICE_ATTR_RW(manage_shutdown); static ssize_t allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->device->allow_restart); } static ssize_t allow_restart_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { bool v; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return -EINVAL; if (kstrtobool(buf, &v)) return -EINVAL; sdp->allow_restart = v; return count; } static DEVICE_ATTR_RW(allow_restart); static ssize_t cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); int ct = sdkp->RCD + 2*sdkp->WCE; return sprintf(buf, "%s\n", sd_cache_types[ct]); } static DEVICE_ATTR_RW(cache_type); static ssize_t FUA_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->DPOFUA); } static DEVICE_ATTR_RO(FUA); static ssize_t protection_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->protection_type); } static ssize_t protection_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); unsigned int val; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; err = kstrtouint(buf, 10, &val); if (err) return err; if (val <= T10_PI_TYPE3_PROTECTION) sdkp->protection_type = val; return count; } static DEVICE_ATTR_RW(protection_type); static ssize_t protection_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; unsigned int dif, dix; dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); dix = scsi_host_dix_capable(sdp->host, sdkp->protection_type); if (!dix && scsi_host_dix_capable(sdp->host, T10_PI_TYPE0_PROTECTION)) { dif = 0; dix = 1; } if (!dif && !dix) return sprintf(buf, "none\n"); return sprintf(buf, "%s%u\n", dix ? "dix" : "dif", dif); } static DEVICE_ATTR_RO(protection_mode); static ssize_t app_tag_own_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->ATO); } static DEVICE_ATTR_RO(app_tag_own); static ssize_t thin_provisioning_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->lbpme); } static DEVICE_ATTR_RO(thin_provisioning); /* sysfs_match_string() requires dense arrays */ static const char *lbp_mode[] = { [SD_LBP_FULL] = "full", [SD_LBP_UNMAP] = "unmap", [SD_LBP_WS16] = "writesame_16", [SD_LBP_WS10] = "writesame_10", [SD_LBP_ZERO] = "writesame_zero", [SD_LBP_DISABLE] = "disabled", }; static ssize_t provisioning_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%s\n", lbp_mode[sdkp->provisioning_mode]); } static ssize_t provisioning_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; struct queue_limits lim; int mode, err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK) return -EINVAL; mode = sysfs_match_string(lbp_mode, buf); if (mode < 0) return -EINVAL; lim = queue_limits_start_update(sdkp->disk->queue); sd_config_discard(sdkp, &lim, mode); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; } static DEVICE_ATTR_RW(provisioning_mode); /* sysfs_match_string() requires dense arrays */ static const char *zeroing_mode[] = { [SD_ZERO_WRITE] = "write", [SD_ZERO_WS] = "writesame", [SD_ZERO_WS16_UNMAP] = "writesame_16_unmap", [SD_ZERO_WS10_UNMAP] = "writesame_10_unmap", }; static ssize_t zeroing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%s\n", zeroing_mode[sdkp->zeroing_mode]); } static ssize_t zeroing_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); int mode; if (!capable(CAP_SYS_ADMIN)) return -EACCES; mode = sysfs_match_string(zeroing_mode, buf); if (mode < 0) return -EINVAL; sdkp->zeroing_mode = mode; return count; } static DEVICE_ATTR_RW(zeroing_mode); static ssize_t max_medium_access_timeouts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->max_medium_access_timeouts); } static ssize_t max_medium_access_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; err = kstrtouint(buf, 10, &sdkp->max_medium_access_timeouts); return err ? err : count; } static DEVICE_ATTR_RW(max_medium_access_timeouts); static ssize_t max_write_same_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->max_ws_blocks); } static ssize_t max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; struct queue_limits lim; unsigned long max; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return -EINVAL; err = kstrtoul(buf, 10, &max); if (err) return err; if (max == 0) sdp->no_write_same = 1; else if (max <= SD_MAX_WS16_BLOCKS) { sdp->no_write_same = 0; sdkp->max_ws_blocks = max; } lim = queue_limits_start_update(sdkp->disk->queue); sd_config_write_same(sdkp, &lim); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; } static DEVICE_ATTR_RW(max_write_same_blocks); static ssize_t zoned_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); if (sdkp->device->type == TYPE_ZBC) return sprintf(buf, "host-managed\n"); if (sdkp->zoned == 1) return sprintf(buf, "host-aware\n"); if (sdkp->zoned == 2) return sprintf(buf, "drive-managed\n"); return sprintf(buf, "none\n"); } static DEVICE_ATTR_RO(zoned_cap); static ssize_t max_retries_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdev = sdkp->device; int retries, err; err = kstrtoint(buf, 10, &retries); if (err) return err; if (retries == SCSI_CMD_RETRIES_NO_LIMIT || retries <= SD_MAX_RETRIES) { sdkp->max_retries = retries; return count; } sdev_printk(KERN_ERR, sdev, "max_retries must be between -1 and %d\n", SD_MAX_RETRIES); return -EINVAL; } static ssize_t max_retries_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%d\n", sdkp->max_retries); } static DEVICE_ATTR_RW(max_retries); static struct attribute *sd_disk_attrs[] = { &dev_attr_cache_type.attr, &dev_attr_FUA.attr, &dev_attr_allow_restart.attr, &dev_attr_manage_start_stop.attr, &dev_attr_manage_system_start_stop.attr, &dev_attr_manage_runtime_start_stop.attr, &dev_attr_manage_shutdown.attr, &dev_attr_protection_type.attr, &dev_attr_protection_mode.attr, &dev_attr_app_tag_own.attr, &dev_attr_thin_provisioning.attr, &dev_attr_provisioning_mode.attr, &dev_attr_zeroing_mode.attr, &dev_attr_max_write_same_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, &dev_attr_zoned_cap.attr, &dev_attr_max_retries.attr, NULL, }; ATTRIBUTE_GROUPS(sd_disk); static struct class sd_disk_class = { .name = "scsi_disk", .dev_release = scsi_disk_release, .dev_groups = sd_disk_groups, }; /* * Don't request a new module, as that could deadlock in multipath * environment. */ static void sd_default_probe(dev_t devt) { } /* * Device no to disk mapping: * * major disc2 disc p1 * |............|.............|....|....| <- dev_t * 31 20 19 8 7 4 3 0 * * Inside a major, we have 16k disks, however mapped non- * contiguously. The first 16 disks are for major0, the next * ones with major1, ... Disk 256 is for major0 again, disk 272 * for major1, ... * As we stay compatible with our numbering scheme, we can reuse * the well-know SCSI majors 8, 65--71, 136--143. */ static int sd_major(int major_idx) { switch (major_idx) { case 0: return SCSI_DISK0_MAJOR; case 1 ... 7: return SCSI_DISK1_MAJOR + major_idx - 1; case 8 ... 15: return SCSI_DISK8_MAJOR + major_idx - 8; default: BUG(); return 0; /* shut up gcc */ } } #ifdef CONFIG_BLK_SED_OPAL static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send) { struct scsi_disk *sdkp = data; struct scsi_device *sdev = sdkp->device; u8 cdb[12] = { 0, }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, }; int ret; cdb[0] = send ? SECURITY_PROTOCOL_OUT : SECURITY_PROTOCOL_IN; cdb[1] = secp; put_unaligned_be16(spsp, &cdb[2]); put_unaligned_be32(len, &cdb[6]); ret = scsi_execute_cmd(sdev, cdb, send ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, len, SD_TIMEOUT, sdkp->max_retries, &exec_args); return ret <= 0 ? ret : -EIO; } #endif /* CONFIG_BLK_SED_OPAL */ /* * Look up the DIX operation based on whether the command is read or * write and whether dix and dif are enabled. */ static unsigned int sd_prot_op(bool write, bool dix, bool dif) { /* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */ static const unsigned int ops[] = { /* wrt dix dif */ SCSI_PROT_NORMAL, /* 0 0 0 */ SCSI_PROT_READ_STRIP, /* 0 0 1 */ SCSI_PROT_READ_INSERT, /* 0 1 0 */ SCSI_PROT_READ_PASS, /* 0 1 1 */ SCSI_PROT_NORMAL, /* 1 0 0 */ SCSI_PROT_WRITE_INSERT, /* 1 0 1 */ SCSI_PROT_WRITE_STRIP, /* 1 1 0 */ SCSI_PROT_WRITE_PASS, /* 1 1 1 */ }; return ops[write << 2 | dix << 1 | dif]; } /* * Returns a mask of the protection flags that are valid for a given DIX * operation. */ static unsigned int sd_prot_flag_mask(unsigned int prot_op) { static const unsigned int flag_mask[] = { [SCSI_PROT_NORMAL] = 0, [SCSI_PROT_READ_STRIP] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT, [SCSI_PROT_READ_INSERT] = SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_READ_PASS] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_WRITE_INSERT] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_REF_INCREMENT, [SCSI_PROT_WRITE_STRIP] = SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_WRITE_PASS] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, }; return flag_mask[prot_op]; } static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, unsigned int dix, unsigned int dif) { struct request *rq = scsi_cmd_to_rq(scmd); struct bio *bio = rq->bio; unsigned int prot_op = sd_prot_op(rq_data_dir(rq), dix, dif); unsigned int protect = 0; if (dix) { /* DIX Type 0, 1, 2, 3 */ if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM)) scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM; if (bio_integrity_flagged(bio, BIP_CHECK_GUARD)) scmd->prot_flags |= SCSI_PROT_GUARD_CHECK; } if (dif != T10_PI_TYPE3_PROTECTION) { /* DIX/DIF Type 0, 1, 2 */ scmd->prot_flags |= SCSI_PROT_REF_INCREMENT; if (bio_integrity_flagged(bio, BIP_CHECK_REFTAG)) scmd->prot_flags |= SCSI_PROT_REF_CHECK; } if (dif) { /* DIX/DIF Type 1, 2, 3 */ scmd->prot_flags |= SCSI_PROT_TRANSFER_PI; if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK)) protect = 3 << 5; /* Disable target PI checking */ else protect = 1 << 5; /* Enable target PI checking */ } scsi_set_prot_op(scmd, prot_op); scsi_set_prot_type(scmd, dif); scmd->prot_flags &= sd_prot_flag_mask(prot_op); return protect; } static void sd_disable_discard(struct scsi_disk *sdkp) { sdkp->provisioning_mode = SD_LBP_DISABLE; blk_queue_disable_discard(sdkp->disk->queue); } static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned int mode) { unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; lim->discard_alignment = sdkp->unmap_alignment * logical_block_size; lim->discard_granularity = max(sdkp->physical_block_size, sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; switch (mode) { case SD_LBP_FULL: case SD_LBP_DISABLE: break; case SD_LBP_UNMAP: max_blocks = min_not_zero(sdkp->max_unmap_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS16: if (sdkp->device->unmap_limit_for_ws) max_blocks = sdkp->max_unmap_blocks; else max_blocks = sdkp->max_ws_blocks; max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS10: if (sdkp->device->unmap_limit_for_ws) max_blocks = sdkp->max_unmap_blocks; else max_blocks = sdkp->max_ws_blocks; max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS10_BLOCKS); break; case SD_LBP_ZERO: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); break; } lim->max_hw_discard_sectors = max_blocks * (logical_block_size >> SECTOR_SHIFT); } static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) { struct page *page; page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!page) return NULL; clear_highpage(page); bvec_set_page(&rq->special_vec, page, data_len, 0); rq->rq_flags |= RQF_SPECIAL_PAYLOAD; return bvec_virt(&rq->special_vec); } static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); unsigned int data_len = 24; char *buf; buf = sd_set_special_bvec(rq, data_len); if (!buf) return BLK_STS_RESOURCE; cmd->cmd_len = 10; cmd->cmnd[0] = UNMAP; cmd->cmnd[8] = 24; put_unaligned_be16(6 + 16, &buf[0]); put_unaligned_be16(16, &buf[2]); put_unaligned_be64(lba, &buf[8]); put_unaligned_be32(nr_blocks, &buf[16]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = SD_TIMEOUT; return scsi_alloc_sgtables(cmd); } static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size, physical_block_size_sectors, max_atomic, unit_min, unit_max; if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) || sdkp->protection_type == T10_PI_TYPE2_PROTECTION) return; physical_block_size_sectors = sdkp->physical_block_size / sdkp->device->sector_size; unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ? sdkp->atomic_granularity : physical_block_size_sectors); /* * Only use atomic boundary when we have the odd scenario of * sdkp->max_atomic == 0, which the spec does permit. */ if (sdkp->max_atomic) { max_atomic = sdkp->max_atomic; unit_max = rounddown_pow_of_two(sdkp->max_atomic); sdkp->use_atomic_write_boundary = 0; } else { max_atomic = sdkp->max_atomic_with_boundary; unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary); sdkp->use_atomic_write_boundary = 1; } /* * Ensure compliance with granularity and alignment. For now, keep it * simple and just don't support atomic writes for values mismatched * with max_{boundary}atomic, physical block size, and * atomic_granularity itself. * * We're really being distrustful by checking unit_max also... */ if (sdkp->atomic_granularity > 1) { if (unit_min > 1 && unit_min % sdkp->atomic_granularity) return; if (unit_max > 1 && unit_max % sdkp->atomic_granularity) return; } if (sdkp->atomic_alignment > 1) { if (unit_min > 1 && unit_min % sdkp->atomic_alignment) return; if (unit_max > 1 && unit_max % sdkp->atomic_alignment) return; } lim->atomic_write_hw_max = max_atomic * logical_block_size; lim->atomic_write_hw_boundary = 0; lim->atomic_write_hw_unit_min = unit_min * logical_block_size; lim->atomic_write_hw_unit_max = unit_max * logical_block_size; lim->features |= BLK_FEAT_ATOMIC_WRITES; } static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_SAME_16; if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; return scsi_alloc_sgtables(cmd); } static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; cmd->cmd_len = 10; cmd->cmnd[0] = WRITE_SAME; if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ put_unaligned_be32(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[7]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; return scsi_alloc_sgtables(cmd); } static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_device *sdp = cmd->device; struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); if (!(rq->cmd_flags & REQ_NOUNMAP)) { switch (sdkp->zeroing_mode) { case SD_ZERO_WS16_UNMAP: return sd_setup_write_same16_cmnd(cmd, true); case SD_ZERO_WS10_UNMAP: return sd_setup_write_same10_cmnd(cmd, true); } } if (sdp->no_write_same) { rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); return sd_setup_write_same10_cmnd(cmd, false); } static void sd_disable_write_same(struct scsi_disk *sdkp) { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; blk_queue_disable_write_zeroes(sdkp->disk->queue); } static void sd_config_write_same(struct scsi_disk *sdkp, struct queue_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size; if (sdkp->device->no_write_same) { sdkp->max_ws_blocks = 0; goto out; } /* Some devices can not handle block counts above 0xffff despite * supporting WRITE SAME(16). Consequently we default to 64k * blocks per I/O unless the device explicitly advertises a * bigger limit. */ if (sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS16_BLOCKS); else if (sdkp->ws16 || sdkp->ws10 || sdkp->device->no_report_opcodes) sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); else { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; } if (sdkp->lbprz && sdkp->lbpws) sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; else if (sdkp->lbprz && sdkp->lbpws10) sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; else if (sdkp->max_ws_blocks) sdkp->zeroing_mode = SD_ZERO_WS; else sdkp->zeroing_mode = SD_ZERO_WRITE; if (sdkp->max_ws_blocks && sdkp->physical_block_size > logical_block_size) { /* * Reporting a maximum number of blocks that is not aligned * on the device physical size would cause a large write same * request to be split into physically unaligned chunks by * __blkdev_issue_write_zeroes() even if the caller of this * functions took care to align the large request. So make sure * the maximum reported is aligned to the device physical block * size. This is only an optional optimization for regular * disks, but this is mandatory to avoid failure of large write * same requests directed at sequential write required zones of * host-managed ZBC disks. */ sdkp->max_ws_blocks = round_down(sdkp->max_ws_blocks, bytes_to_logical(sdkp->device, sdkp->physical_block_size)); } out: lim->max_write_zeroes_sectors = sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT); } static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); /* flush requests don't perform I/O, zero the S/G table */ memset(&cmd->sdb, 0, sizeof(cmd->sdb)); if (cmd->device->use_16_for_sync) { cmd->cmnd[0] = SYNCHRONIZE_CACHE_16; cmd->cmd_len = 16; } else { cmd->cmnd[0] = SYNCHRONIZE_CACHE; cmd->cmd_len = 10; } cmd->transfersize = 0; cmd->allowed = sdkp->max_retries; rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; return BLK_STS_OK; } /** * sd_group_number() - Compute the GROUP NUMBER field * @cmd: SCSI command for which to compute the value of the six-bit GROUP NUMBER * field. * * From SBC-5 r05 (https://www.t10.org/cgi-bin/ac.pl?t=f&f=sbc5r05.pdf): * 0: no relative lifetime. * 1: shortest relative lifetime. * 2: second shortest relative lifetime. * 3 - 0x3d: intermediate relative lifetimes. * 0x3e: second longest relative lifetime. * 0x3f: longest relative lifetime. */ static u8 sd_group_number(struct scsi_cmnd *cmd) { const struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); if (!sdkp->rscs) return 0; return min3((u32)rq->bio->bi_write_hint, (u32)sdkp->permanent_stream_count, 0x3fu); } static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags, unsigned int dld) { cmd->cmd_len = SD_EXT_CDB_SIZE; cmd->cmnd[0] = VARIABLE_LENGTH_CMD; cmd->cmnd[6] = sd_group_number(cmd); cmd->cmnd[7] = 0x18; /* Additional CDB len */ cmd->cmnd[9] = write ? WRITE_32 : READ_32; cmd->cmnd[10] = flags; cmd->cmnd[11] = dld & 0x07; put_unaligned_be64(lba, &cmd->cmnd[12]); put_unaligned_be32(lba, &cmd->cmnd[20]); /* Expected Indirect LBA */ put_unaligned_be32(nr_blocks, &cmd->cmnd[28]); return BLK_STS_OK; } static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags, unsigned int dld) { cmd->cmd_len = 16; cmd->cmnd[0] = write ? WRITE_16 : READ_16; cmd->cmnd[1] = flags | ((dld >> 2) & 0x01); cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd); cmd->cmnd[15] = 0; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); return BLK_STS_OK; } static blk_status_t sd_setup_rw10_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags) { cmd->cmd_len = 10; cmd->cmnd[0] = write ? WRITE_10 : READ_10; cmd->cmnd[1] = flags; cmd->cmnd[6] = sd_group_number(cmd); cmd->cmnd[9] = 0; put_unaligned_be32(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[7]); return BLK_STS_OK; } static blk_status_t sd_setup_rw6_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags) { /* Avoid that 0 blocks gets translated into 256 blocks. */ if (WARN_ON_ONCE(nr_blocks == 0)) return BLK_STS_IOERR; if (unlikely(flags & 0x8)) { /* * This happens only if this drive failed 10byte rw * command with ILLEGAL_REQUEST during operation and * thus turned off use_10_for_rw. */ scmd_printk(KERN_ERR, cmd, "FUA write on READ/WRITE(6) drive\n"); return BLK_STS_IOERR; } cmd->cmd_len = 6; cmd->cmnd[0] = write ? WRITE_6 : READ_6; cmd->cmnd[1] = (lba >> 16) & 0x1f; cmd->cmnd[2] = (lba >> 8) & 0xff; cmd->cmnd[3] = lba & 0xff; cmd->cmnd[4] = nr_blocks; cmd->cmnd[5] = 0; return BLK_STS_OK; } /* * Check if a command has a duration limit set. If it does, and the target * device supports CDL and the feature is enabled, return the limit * descriptor index to use. Return 0 (no limit) otherwise. */ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd) { struct scsi_device *sdp = sdkp->device; int hint; if (!sdp->cdl_supported || !sdp->cdl_enable) return 0; /* * Use "no limit" if the request ioprio does not specify a duration * limit hint. */ hint = IOPRIO_PRIO_HINT(req_get_ioprio(scsi_cmd_to_rq(scmd))); if (hint < IOPRIO_HINT_DEV_DURATION_LIMIT_1 || hint > IOPRIO_HINT_DEV_DURATION_LIMIT_7) return 0; return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1; } static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd, sector_t lba, unsigned int nr_blocks, bool boundary, unsigned char flags) { cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_ATOMIC_16; cmd->cmnd[1] = flags; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); if (boundary) put_unaligned_be16(nr_blocks, &cmd->cmnd[10]); else put_unaligned_be16(0, &cmd->cmnd[10]); put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); cmd->cmnd[14] = 0; cmd->cmnd[15] = 0; return BLK_STS_OK; } static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_device *sdp = cmd->device; struct scsi_disk *sdkp = scsi_disk(rq->q->disk); sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq)); sector_t threshold; unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; unsigned int dld; blk_status_t ret; unsigned int dif; bool dix; ret = scsi_alloc_sgtables(cmd); if (ret != BLK_STS_OK) return ret; ret = BLK_STS_IOERR; if (!scsi_device_online(sdp) || sdp->changed) { scmd_printk(KERN_ERR, cmd, "device offline or changed\n"); goto fail; } if (blk_rq_pos(rq) + blk_rq_sectors(rq) > get_capacity(rq->q->disk)) { scmd_printk(KERN_ERR, cmd, "access beyond end of device\n"); goto fail; } if ((blk_rq_pos(rq) & mask) || (blk_rq_sectors(rq) & mask)) { scmd_printk(KERN_ERR, cmd, "request not aligned to the logical block size\n"); goto fail; } /* * Some SD card readers can't handle accesses which touch the * last one or two logical blocks. Split accesses as needed. */ threshold = sdkp->capacity - SD_LAST_BUGGY_SECTORS; if (unlikely(sdp->last_sector_bug && lba + nr_blocks > threshold)) { if (lba < threshold) { /* Access up to the threshold but not beyond */ nr_blocks = threshold - lba; } else { /* Access only a single logical block */ nr_blocks = 1; } } fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0; dix = scsi_prot_sg_count(cmd); dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); dld = sd_cdl_dld(sdkp, cmd); if (dif || dix) protect = sd_setup_protect_cmnd(cmd, dix, dif); else protect = 0; if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); } else if (rq->cmd_flags & REQ_ATOMIC) { ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks, sdkp->use_atomic_write_boundary, protect | fua); } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw || protect || rq->bio->bi_write_hint) { ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks, protect | fua); } else { ret = sd_setup_rw6_cmnd(cmd, write, lba, nr_blocks, protect | fua); } if (unlikely(ret != BLK_STS_OK)) goto fail; /* * We shouldn't disconnect in the middle of a sector, so with a dumb * host adapter, it's safe to assume that we can at least transfer * this many bytes between each connect / disconnect. */ cmd->transfersize = sdp->sector_size; cmd->underflow = nr_blocks << 9; cmd->allowed = sdkp->max_retries; cmd->sdb.length = nr_blocks * sdp->sector_size; SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, cmd, "%s: block=%llu, count=%d\n", __func__, (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, cmd, "%s %d/%u 512 byte blocks.\n", write ? "writing" : "reading", nr_blocks, blk_rq_sectors(rq))); /* * This indicates that the command is ready from our end to be queued. */ return BLK_STS_OK; fail: scsi_free_sgtables(cmd); return ret; } static blk_status_t sd_init_command(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); switch (req_op(rq)) { case REQ_OP_DISCARD: switch (scsi_disk(rq->q->disk)->provisioning_mode) { case SD_LBP_UNMAP: return sd_setup_unmap_cmnd(cmd); case SD_LBP_WS16: return sd_setup_write_same16_cmnd(cmd, true); case SD_LBP_WS10: return sd_setup_write_same10_cmnd(cmd, true); case SD_LBP_ZERO: return sd_setup_write_same10_cmnd(cmd, false); default: return BLK_STS_TARGET; } case REQ_OP_WRITE_ZEROES: return sd_setup_write_zeroes_cmnd(cmd); case REQ_OP_FLUSH: return sd_setup_flush_cmnd(cmd); case REQ_OP_READ: case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); case REQ_OP_ZONE_RESET: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, false); case REQ_OP_ZONE_RESET_ALL: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, true); case REQ_OP_ZONE_OPEN: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false); case REQ_OP_ZONE_CLOSE: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false); case REQ_OP_ZONE_FINISH: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false); default: WARN_ON_ONCE(1); return BLK_STS_NOTSUPP; } } static void sd_uninit_command(struct scsi_cmnd *SCpnt) { struct request *rq = scsi_cmd_to_rq(SCpnt); if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) mempool_free(rq->special_vec.bv_page, sd_page_pool); } static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp) { if (sdkp->device->removable || sdkp->write_prot) { if (disk_check_media_change(disk)) return true; } /* * Force a full rescan after ioctl(BLKRRPART). While the disk state has * nothing to do with partitions, BLKRRPART is used to force a full * revalidate after things like a format for historical reasons. */ return test_bit(GD_NEED_PART_SCAN, &disk->state); } /** * sd_open - open a scsi disk device * @disk: disk to open * @mode: open mode * * Returns 0 if successful. Returns a negated errno value in case * of error. * * Note: This can be called from a user context (e.g. fsck(1) ) * or from within the kernel (e.g. as a result of a mount(1) ). * In the latter case @inode and @filp carry an abridged amount * of information as noted above. * * Locking: called with disk->open_mutex held. **/ static int sd_open(struct gendisk *disk, blk_mode_t mode) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; int retval; if (scsi_device_get(sdev)) return -ENXIO; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); /* * If the device is in error recovery, wait until it is done. * If the device is offline, then disallow any access to it. */ retval = -ENXIO; if (!scsi_block_when_processing_errors(sdev)) goto error_out; if (sd_need_revalidate(disk, sdkp)) sd_revalidate_disk(disk); /* * If the drive is empty, just let the open fail. */ retval = -ENOMEDIUM; if (sdev->removable && !sdkp->media_present && !(mode & BLK_OPEN_NDELAY)) goto error_out; /* * If the device has the write protect tab set, have the open fail * if the user expects to be able to write to the thing. */ retval = -EROFS; if (sdkp->write_prot && (mode & BLK_OPEN_WRITE)) goto error_out; /* * It is possible that the disk changing stuff resulted in * the device being taken offline. If this is the case, * report this to the user, and don't pretend that the * open actually succeeded. */ retval = -ENXIO; if (!scsi_device_online(sdev)) goto error_out; if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); } return 0; error_out: scsi_device_put(sdev); return retval; } /** * sd_release - invoked when the (last) close(2) is called on this * scsi disk. * @disk: disk to release * * Returns 0. * * Note: may block (uninterruptible) if error recovery is underway * on this disk. * * Locking: called with disk->open_mutex held. **/ static void sd_release(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); if (atomic_dec_return(&sdkp->openers) == 0 && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } scsi_device_put(sdev); } static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdp = sdkp->device; struct Scsi_Host *host = sdp->host; sector_t capacity = logical_to_sectors(sdp, sdkp->capacity); int diskinfo[4]; /* default to most commonly used values */ diskinfo[0] = 0x40; /* 1 << 6 */ diskinfo[1] = 0x20; /* 1 << 5 */ diskinfo[2] = capacity >> 11; /* override with calculated, extended default, or driver values */ if (host->hostt->bios_param) host->hostt->bios_param(sdp, bdev, capacity, diskinfo); else scsicam_bios_param(bdev, capacity, diskinfo); geo->heads = diskinfo[0]; geo->sectors = diskinfo[1]; geo->cylinders = diskinfo[2]; return 0; } /** * sd_ioctl - process an ioctl * @bdev: target block device * @mode: open mode * @cmd: ioctl command number * @arg: this is third argument given to ioctl(2) system call. * Often contains a pointer. * * Returns 0 if successful (some ioctls return positive numbers on * success as well). Returns a negated errno value in case of error. * * Note: most ioctls are forward onto the block subsystem or further * down in the scsi subsystem. **/ static int sd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct gendisk *disk = bdev->bd_disk; struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; void __user *p = (void __user *)arg; int error; SCSI_LOG_IOCTL(1, sd_printk(KERN_INFO, sdkp, "sd_ioctl: disk=%s, " "cmd=0x%x\n", disk->disk_name, cmd)); if (bdev_is_partition(bdev) && !capable(CAP_SYS_RAWIO)) return -ENOIOCTLCMD; /* * If we are in the middle of error recovery, don't let anyone * else try and use this device. Also, if error recovery fails, it * may try and take the device offline, in which case all further * access to the device is prohibited. */ error = scsi_ioctl_block_when_processing_errors(sdp, cmd, (mode & BLK_OPEN_NDELAY)); if (error) return error; if (is_sed_ioctl(cmd)) return sed_ioctl(sdkp->opal_dev, cmd, p); return scsi_ioctl(sdp, mode & BLK_OPEN_WRITE, cmd, p); } static void set_media_not_present(struct scsi_disk *sdkp) { if (sdkp->media_present) sdkp->device->changed = 1; if (sdkp->device->removable) { sdkp->media_present = 0; sdkp->capacity = 0; } } static int media_not_present(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { if (!scsi_sense_valid(sshdr)) return 0; /* not invoked for commands that could return deferred errors */ switch (sshdr->sense_key) { case UNIT_ATTENTION: case NOT_READY: /* medium not present */ if (sshdr->asc == 0x3A) { set_media_not_present(sdkp); return 1; } } return 0; } /** * sd_check_events - check media events * @disk: kernel device descriptor * @clearing: disk events currently being cleared * * Returns mask of DISK_EVENT_*. * * Note: this function is invoked from the block subsystem. **/ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing) { struct scsi_disk *sdkp = disk->private_data; struct scsi_device *sdp; int retval; bool disk_changed; if (!sdkp) return 0; sdp = sdkp->device; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_check_events\n")); /* * If the device is offline, don't send any commands - just pretend as * if the command failed. If the device ever comes back online, we * can deal with it then. It is only because of unrecoverable errors * that we would ever take a device offline in the first place. */ if (!scsi_device_online(sdp)) { set_media_not_present(sdkp); goto out; } /* * Using TEST_UNIT_READY enables differentiation between drive with * no cartridge loaded - NOT READY, drive with changed cartridge - * UNIT ATTENTION, or with same cartridge - GOOD STATUS. * * Drives that auto spin down. eg iomega jaz 1G, will be started * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever * sd_revalidate() is called. */ if (scsi_block_when_processing_errors(sdp)) { struct scsi_sense_hdr sshdr = { 0, }; retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, sdkp->max_retries, &sshdr); /* failed to execute TUR, assume media not present */ if (retval < 0 || host_byte(retval)) { set_media_not_present(sdkp); goto out; } if (media_not_present(sdkp, &sshdr)) goto out; } /* * For removable scsi disk we have to recognise the presence * of a disk in the drive. */ if (!sdkp->media_present) sdp->changed = 1; sdkp->media_present = 1; out: /* * sdp->changed is set under the following conditions: * * Medium present state has changed in either direction. * Device has indicated UNIT_ATTENTION. */ disk_changed = sdp->changed; sdp->changed = 0; return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0; } static int sd_sync_cache(struct scsi_disk *sdkp) { int res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; /* Leave the rest of the command zero to indicate flush everything. */ const unsigned char cmd[16] = { sdp->use_16_for_sync ? SYNCHRONIZE_CACHE_16 : SYNCHRONIZE_CACHE }; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { .allowed = 3, .result = SCMD_FAILURE_RESULT_ANY, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, .sshdr = &sshdr, .failures = &failures, }; if (!scsi_device_online(sdp)) return -ENODEV; res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Synchronize Cache(10) failed", res); if (res < 0) return res; if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr)) { sd_print_sense_hdr(sdkp, &sshdr); /* we need to evaluate the error return */ if (sshdr.asc == 0x3a || /* medium not present */ sshdr.asc == 0x20 || /* invalid command */ (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */ /* this is no error here */ return 0; /* * If a format is in progress or if the drive does not * support sync, there is not much we can do because * this is called during shutdown or suspend so just * return success so those operations can proceed. */ if ((sshdr.asc == 0x04 && sshdr.ascq == 0x04) || sshdr.sense_key == ILLEGAL_REQUEST) return 0; } switch (host_byte(res)) { /* ignore errors due to racing a disconnection */ case DID_BAD_TARGET: case DID_NO_CONNECT: return 0; /* signal the upper layer it might try again */ case DID_BUS_BUSY: case DID_IMM_RETRY: case DID_REQUEUE: case DID_SOFT_ERROR: return -EBUSY; default: return -EIO; } } return 0; } static void sd_rescan(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); sd_revalidate_disk(sdkp->disk); } static int sd_get_unique_id(struct gendisk *disk, u8 id[16], enum blk_unique_id type) { struct scsi_device *sdev = scsi_disk(disk)->device; const struct scsi_vpd *vpd; const unsigned char *d; int ret = -ENXIO, len; rcu_read_lock(); vpd = rcu_dereference(sdev->vpd_pg83); if (!vpd) goto out_unlock; ret = -EINVAL; for (d = vpd->data + 4; d < vpd->data + vpd->len; d += d[3] + 4) { /* we only care about designators with LU association */ if (((d[1] >> 4) & 0x3) != 0x00) continue; if ((d[1] & 0xf) != type) continue; /* * Only exit early if a 16-byte descriptor was found. Otherwise * keep looking as one with more entropy might still show up. */ len = d[3]; if (len != 8 && len != 12 && len != 16) continue; ret = len; memcpy(id, d + 4, len); if (len == 16) break; } out_unlock: rcu_read_unlock(); return ret; } static int sd_scsi_to_pr_err(struct scsi_sense_hdr *sshdr, int result) { switch (host_byte(result)) { case DID_TRANSPORT_MARGINAL: case DID_TRANSPORT_DISRUPTED: case DID_BUS_BUSY: return PR_STS_RETRY_PATH_FAILURE; case DID_NO_CONNECT: return PR_STS_PATH_FAILED; case DID_TRANSPORT_FAILFAST: return PR_STS_PATH_FAST_FAILED; } switch (status_byte(result)) { case SAM_STAT_RESERVATION_CONFLICT: return PR_STS_RESERVATION_CONFLICT; case SAM_STAT_CHECK_CONDITION: if (!scsi_sense_valid(sshdr)) return PR_STS_IOERR; if (sshdr->sense_key == ILLEGAL_REQUEST && (sshdr->asc == 0x26 || sshdr->asc == 0x24)) return -EINVAL; fallthrough; default: return PR_STS_IOERR; } } static int sd_pr_in_command(struct block_device *bdev, u8 sa, unsigned char *data, int data_len) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa }; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = 5, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int result; put_unaligned_be16(data_len, &cmd[7]); result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, data, data_len, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (scsi_status_is_check_condition(result) && scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); } if (result <= 0) return result; return sd_scsi_to_pr_err(&sshdr, result); } static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) { int result, i, data_offset, num_copy_keys; u32 num_keys = keys_info->num_keys; int data_len = num_keys * 8 + 8; u8 *data; data = kzalloc(data_len, GFP_KERNEL); if (!data) return -ENOMEM; result = sd_pr_in_command(bdev, READ_KEYS, data, data_len); if (result) goto free_data; keys_info->generation = get_unaligned_be32(&data[0]); keys_info->num_keys = get_unaligned_be32(&data[4]) / 8; data_offset = 8; num_copy_keys = min(num_keys, keys_info->num_keys); for (i = 0; i < num_copy_keys; i++) { keys_info->keys[i] = get_unaligned_be64(&data[data_offset]); data_offset += 8; } free_data: kfree(data); return result; } static int sd_pr_read_reservation(struct block_device *bdev, struct pr_held_reservation *rsv) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; u8 data[24] = { }; int result, len; result = sd_pr_in_command(bdev, READ_RESERVATION, data, sizeof(data)); if (result) return result; len = get_unaligned_be32(&data[4]); if (!len) return 0; /* Make sure we have at least the key and type */ if (len < 14) { sdev_printk(KERN_INFO, sdev, "READ RESERVATION failed due to short return buffer of %d bytes\n", len); return -EINVAL; } rsv->generation = get_unaligned_be32(&data[0]); rsv->key = get_unaligned_be64(&data[8]); rsv->type = scsi_pr_type_to_block(data[21] & 0x0f); return 0; } static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = 5, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int result; u8 cmd[16] = { 0, }; u8 data[24] = { 0, }; cmd[0] = PERSISTENT_RESERVE_OUT; cmd[1] = sa; cmd[2] = type; put_unaligned_be32(sizeof(data), &cmd[5]); put_unaligned_be64(key, &data[0]); put_unaligned_be64(sa_key, &data[8]); data[20] = flags; result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, &data, sizeof(data), SD_TIMEOUT, sdkp->max_retries, &exec_args); if (scsi_status_is_check_condition(result) && scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); } if (result <= 0) return result; return sd_scsi_to_pr_err(&sshdr, result); } static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags) { if (flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00, old_key, new_key, 0, (1 << 0) /* APTPL */); } static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, u32 flags) { if (flags) return -EOPNOTSUPP; return sd_pr_out_command(bdev, 0x01, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { return sd_pr_out_command(bdev, 0x02, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort) { return sd_pr_out_command(bdev, abort ? 0x05 : 0x04, old_key, new_key, block_pr_type_to_scsi(type), 0); } static int sd_pr_clear(struct block_device *bdev, u64 key) { return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0); } static const struct pr_ops sd_pr_ops = { .pr_register = sd_pr_register, .pr_reserve = sd_pr_reserve, .pr_release = sd_pr_release, .pr_preempt = sd_pr_preempt, .pr_clear = sd_pr_clear, .pr_read_keys = sd_pr_read_keys, .pr_read_reservation = sd_pr_read_reservation, }; static void scsi_disk_free_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); put_device(&sdkp->disk_dev); } static const struct block_device_operations sd_fops = { .owner = THIS_MODULE, .open = sd_open, .release = sd_release, .ioctl = sd_ioctl, .getgeo = sd_getgeo, .compat_ioctl = blkdev_compat_ptr_ioctl, .check_events = sd_check_events, .unlock_native_capacity = sd_unlock_native_capacity, .report_zones = sd_zbc_report_zones, .get_unique_id = sd_get_unique_id, .free_disk = scsi_disk_free_disk, .pr_ops = &sd_pr_ops, }; /** * sd_eh_reset - reset error handling callback * @scmd: sd-issued command that has failed * * This function is called by the SCSI midlayer before starting * SCSI EH. When counting medium access failures we have to be * careful to register it only only once per device and SCSI EH run; * there might be several timed out commands which will cause the * 'max_medium_access_timeouts' counter to trigger after the first * SCSI EH run already and set the device to offline. * So this function resets the internal counter before starting SCSI EH. **/ static void sd_eh_reset(struct scsi_cmnd *scmd) { struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk); /* New SCSI EH run, reset gate variable */ sdkp->ignore_medium_access_errors = false; } /** * sd_eh_action - error handling callback * @scmd: sd-issued command that has failed * @eh_disp: The recovery disposition suggested by the midlayer * * This function is called by the SCSI midlayer upon completion of an * error test command (currently TEST UNIT READY). The result of sending * the eh command is passed in eh_disp. We're looking for devices that * fail medium access commands but are OK with non access commands like * test unit ready (so wrongly see the device as having a successful * recovery) **/ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp) { struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk); struct scsi_device *sdev = scmd->device; if (!scsi_device_online(sdev) || !scsi_medium_access_command(scmd) || host_byte(scmd->result) != DID_TIME_OUT || eh_disp != SUCCESS) return eh_disp; /* * The device has timed out executing a medium access command. * However, the TEST UNIT READY command sent during error * handling completed successfully. Either the device is in the * process of recovering or has it suffered an internal failure * that prevents access to the storage medium. */ if (!sdkp->ignore_medium_access_errors) { sdkp->medium_access_timed_out++; sdkp->ignore_medium_access_errors = true; } /* * If the device keeps failing read/write commands but TEST UNIT * READY always completes successfully we assume that medium * access is no longer possible and take the device offline. */ if (sdkp->medium_access_timed_out >= sdkp->max_medium_access_timeouts) { scmd_printk(KERN_ERR, scmd, "Medium access timeout failure. Offlining disk!\n"); mutex_lock(&sdev->state_mutex); scsi_device_set_state(sdev, SDEV_OFFLINE); mutex_unlock(&sdev->state_mutex); return SUCCESS; } return eh_disp; } static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) { struct request *req = scsi_cmd_to_rq(scmd); struct scsi_device *sdev = scmd->device; unsigned int transferred, good_bytes; u64 start_lba, end_lba, bad_lba; /* * Some commands have a payload smaller than the device logical * block size (e.g. INQUIRY on a 4K disk). */ if (scsi_bufflen(scmd) <= sdev->sector_size) return 0; /* Check if we have a 'bad_lba' information */ if (!scsi_get_sense_info_fld(scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &bad_lba)) return 0; /* * If the bad lba was reported incorrectly, we have no idea where * the error is. */ start_lba = sectors_to_logical(sdev, blk_rq_pos(req)); end_lba = start_lba + bytes_to_logical(sdev, scsi_bufflen(scmd)); if (bad_lba < start_lba || bad_lba >= end_lba) return 0; /* * resid is optional but mostly filled in. When it's unused, * its value is zero, so we assume the whole buffer transferred */ transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd); /* This computation should always be done in terms of the * resolution of the device's medium. */ good_bytes = logical_to_bytes(sdev, bad_lba - start_lba); return min(good_bytes, transferred); } /** * sd_done - bottom half handler: called when the lower level * driver has completed (successfully or otherwise) a scsi command. * @SCpnt: mid-level's per command structure. * * Note: potentially run from within an ISR. Must not block. **/ static int sd_done(struct scsi_cmnd *SCpnt) { int result = SCpnt->result; unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); unsigned int sector_size = SCpnt->device->sector_size; unsigned int resid; struct scsi_sense_hdr sshdr; struct request *req = scsi_cmd_to_rq(SCpnt); struct scsi_disk *sdkp = scsi_disk(req->q->disk); int sense_valid = 0; int sense_deferred = 0; switch (req_op(req)) { case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: if (!result) { good_bytes = blk_rq_bytes(req); scsi_set_resid(SCpnt, 0); } else { good_bytes = 0; scsi_set_resid(SCpnt, blk_rq_bytes(req)); } break; default: /* * In case of bogus fw or device, we could end up having * an unaligned partial completion. Check this here and force * alignment. */ resid = scsi_get_resid(SCpnt); if (resid & (sector_size - 1)) { sd_printk(KERN_INFO, sdkp, "Unaligned partial completion (resid=%u, sector_sz=%u)\n", resid, sector_size); scsi_print_command(SCpnt); resid = min(scsi_bufflen(SCpnt), round_up(resid, sector_size)); scsi_set_resid(SCpnt, resid); } } if (result) { sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); if (sense_valid) sense_deferred = scsi_sense_is_deferred(&sshdr); } sdkp->medium_access_timed_out = 0; if (!scsi_status_is_check_condition(result) && (!sense_valid || sense_deferred)) goto out; switch (sshdr.sense_key) { case HARDWARE_ERROR: case MEDIUM_ERROR: good_bytes = sd_completed_bytes(SCpnt); break; case RECOVERED_ERROR: good_bytes = scsi_bufflen(SCpnt); break; case NO_SENSE: /* This indicates a false check condition, so ignore it. An * unknown amount of data was transferred so treat it as an * error. */ SCpnt->result = 0; memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); break; case ABORTED_COMMAND: if (sshdr.asc == 0x10) /* DIF: Target detected corruption */ good_bytes = sd_completed_bytes(SCpnt); break; case ILLEGAL_REQUEST: switch (sshdr.asc) { case 0x10: /* DIX: Host detected corruption */ good_bytes = sd_completed_bytes(SCpnt); break; case 0x20: /* INVALID COMMAND OPCODE */ case 0x24: /* INVALID FIELD IN CDB */ switch (SCpnt->cmnd[0]) { case UNMAP: sd_disable_discard(sdkp); break; case WRITE_SAME_16: case WRITE_SAME: if (SCpnt->cmnd[1] & 8) { /* UNMAP */ sd_disable_discard(sdkp); } else { sd_disable_write_same(sdkp); req->rq_flags |= RQF_QUIET; } break; } } break; default: break; } out: if (sdkp->device->type == TYPE_ZBC) good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr); SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, "sd_done: completed %d of %d bytes\n", good_bytes, scsi_bufflen(SCpnt))); return good_bytes; } /* * spinup disk - called only in sd_revalidate_disk() */ static void sd_spinup_disk(struct scsi_disk *sdkp) { static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; int spintime, sense_valid = 0; unsigned int the_result; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ { .sense = UNIT_ATTENTION, .asc = 0x3A, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = NOT_READY, .asc = 0x3A, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, /* Retry when scsi_status_is_good would return false 3 times */ { .result = SCMD_FAILURE_STAT_ANY, .allowed = 3, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; spintime = 0; /* Spin up drives, as required. Only do this at boot time */ /* Spinup needs to be done for module loads too. */ do { bool media_was_present = sdkp->media_present; scsi_failures_reset_retries(&failures); the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { /* * If the drive has indicated to us that it doesn't * have any media in it, don't bother with any more * polling. */ if (media_not_present(sdkp, &sshdr)) { if (media_was_present) sd_printk(KERN_NOTICE, sdkp, "Media removed, stopped polling\n"); return; } sense_valid = scsi_sense_valid(&sshdr); } if (!scsi_status_is_check_condition(the_result)) { /* no sense, TUR either succeeded or failed * with a status error */ if(!spintime && !scsi_status_is_good(the_result)) { sd_print_result(sdkp, "Test Unit Ready failed", the_result); } break; } /* * The device does not want the automatic start to be issued. */ if (sdkp->device->no_start_on_add) break; if (sense_valid && sshdr.sense_key == NOT_READY) { if (sshdr.asc == 4 && sshdr.ascq == 3) break; /* manual intervention required */ if (sshdr.asc == 4 && sshdr.ascq == 0xb) break; /* standby */ if (sshdr.asc == 4 && sshdr.ascq == 0xc) break; /* unavailable */ if (sshdr.asc == 4 && sshdr.ascq == 0x1b) break; /* sanitize in progress */ if (sshdr.asc == 4 && sshdr.ascq == 0x24) break; /* depopulation in progress */ if (sshdr.asc == 4 && sshdr.ascq == 0x25) break; /* depopulation restoration in progress */ /* * Issue command to spin up drive when not ready */ if (!spintime) { /* Return immediately and start spin cycle */ const u8 start_cmd[10] = { [0] = START_STOP, [1] = 1, [4] = sdkp->device->start_stop_pwr_cond ? 0x11 : 1, }; sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); scsi_execute_cmd(sdkp->device, start_cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); spintime_expire = jiffies + 100 * HZ; spintime = 1; } /* Wait 1 second for next try */ msleep(1000); printk(KERN_CONT "."); /* * Wait for USB flash devices with slow firmware. * Yes, this sense key/ASC combination shouldn't * occur here. It's characteristic of these devices. */ } else if (sense_valid && sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x28) { if (!spintime) { spintime_expire = jiffies + 5 * HZ; spintime = 1; } /* Wait 1 second for next try */ msleep(1000); } else { /* we don't understand the sense code, so it's * probably pointless to loop */ if(!spintime) { sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); sd_print_sense_hdr(sdkp, &sshdr); } break; } } while (spintime && time_before_eq(jiffies, spintime_expire)); if (spintime) { if (scsi_status_is_good(the_result)) printk(KERN_CONT "ready\n"); else printk(KERN_CONT "not responding...\n"); } } /* * Determine whether disk supports Data Integrity Field. */ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; u8 type; if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) { sdkp->protection_type = 0; return 0; } type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ if (type > T10_PI_TYPE3_PROTECTION) { sd_printk(KERN_ERR, sdkp, "formatted with unsupported" \ " protection type %u. Disabling disk!\n", type); sdkp->protection_type = 0; return -ENODEV; } sdkp->protection_type = type; return 0; } static void sd_config_protection(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) sd_dif_config_host(sdkp, lim); if (!sdkp->protection_type) return; if (!scsi_host_dif_capable(sdp->host, sdkp->protection_type)) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling DIF Type %u protection\n", sdkp->protection_type); sdkp->protection_type = 0; } sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIF Type %u protection\n", sdkp->protection_type); } static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, struct scsi_sense_hdr *sshdr, int sense_valid, int the_result) { if (sense_valid) sd_print_sense_hdr(sdkp, sshdr); else sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n"); /* * Set dirty bit for removable devices if not ready - * sometimes drives will not report this properly. */ if (sdp->removable && sense_valid && sshdr->sense_key == NOT_READY) set_media_not_present(sdkp); /* * We used to set media_present to 0 here to indicate no media * in the drive, but some drives fail read capacity even with * media present, so we can't do that. */ sdkp->capacity = 0; /* unknown mapped to zero - as usual */ } #define RC16_LEN 32 #if RC16_LEN > SD_BUF_SIZE #error RC16_LEN must not be more than SD_BUF_SIZE #endif #define READ_CAPACITY_RETRIES_ON_RESET 10 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, struct queue_limits *lim, unsigned char *buffer) { unsigned char cmd[16]; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; int sense_valid = 0; int the_result; int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; unsigned int alignment; unsigned long long lba; unsigned sector_size; if (sdp->no_read_capacity_16) return -EINVAL; do { memset(cmd, 0, 16); cmd[0] = SERVICE_ACTION_IN_16; cmd[1] = SAI_READ_CAPACITY_16; cmd[13] = RC16_LEN; memset(buffer, 0, RC16_LEN); the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, RC16_LEN, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { if (media_not_present(sdkp, &sshdr)) return -ENODEV; sense_valid = scsi_sense_valid(&sshdr); if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST && (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) /* Invalid Command Operation Code or * Invalid Field in CDB, just retry * silently with RC10 */ return -EINVAL; if (sense_valid && sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29 && sshdr.ascq == 0x00) /* Device reset might occur several times, * give it one more chance */ if (--reset_retries > 0) continue; } retries--; } while (the_result && retries); if (the_result) { sd_print_result(sdkp, "Read Capacity(16) failed", the_result); read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); return -EINVAL; } sector_size = get_unaligned_be32(&buffer[8]); lba = get_unaligned_be64(&buffer[0]); if (sd_read_protection_type(sdkp, buffer) < 0) { sdkp->capacity = 0; return -ENODEV; } /* Logical blocks per physical block exponent */ sdkp->physical_block_size = (1 << (buffer[13] & 0xf)) * sector_size; /* RC basis */ sdkp->rc_basis = (buffer[12] >> 4) & 0x3; /* Lowest aligned logical block */ alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; lim->alignment_offset = alignment; if (alignment && sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "physical block alignment offset: %u\n", alignment); if (buffer[14] & 0x80) { /* LBPME */ sdkp->lbpme = 1; if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; } sdkp->capacity = lba + 1; return sector_size; } static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { static const u8 cmd[10] = { READ_CAPACITY }; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ { .sense = UNIT_ATTENTION, .asc = 0x3A, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = NOT_READY, .asc = 0x3A, .result = SAM_STAT_CHECK_CONDITION, }, /* Device reset might occur several times so retry a lot */ { .sense = UNIT_ATTENTION, .asc = 0x29, .allowed = READ_CAPACITY_RETRIES_ON_RESET, .result = SAM_STAT_CHECK_CONDITION, }, /* Any other error not listed above retry 3 times */ { .result = SCMD_FAILURE_RESULT_ANY, .allowed = 3, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int sense_valid = 0; int the_result; sector_t lba; unsigned sector_size; memset(buffer, 0, 8); the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, 8, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { sense_valid = scsi_sense_valid(&sshdr); if (media_not_present(sdkp, &sshdr)) return -ENODEV; } if (the_result) { sd_print_result(sdkp, "Read Capacity(10) failed", the_result); read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); return -EINVAL; } sector_size = get_unaligned_be32(&buffer[4]); lba = get_unaligned_be32(&buffer[0]); if (sdp->no_read_capacity_16 && (lba == 0xffffffff)) { /* Some buggy (usb cardreader) devices return an lba of 0xffffffff when the want to report a size of 0 (with which they really mean no media is present) */ sdkp->capacity = 0; sdkp->physical_block_size = sector_size; return sector_size; } sdkp->capacity = lba + 1; sdkp->physical_block_size = sector_size; return sector_size; } static int sd_try_rc16_first(struct scsi_device *sdp) { if (sdp->host->max_cmd_len < 16) return 0; if (sdp->try_rc_10_first) return 0; if (sdp->scsi_level > SCSI_SPC_2) return 1; if (scsi_device_protection(sdp)) return 1; return 0; } /* * read disk capacity */ static void sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned char *buffer) { int sector_size; struct scsi_device *sdp = sdkp->device; if (sd_try_rc16_first(sdp)) { sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size == -ENODEV) return; if (sector_size < 0) sector_size = read_capacity_10(sdkp, sdp, buffer); if (sector_size < 0) return; } else { sector_size = read_capacity_10(sdkp, sdp, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size < 0) return; if ((sizeof(sdkp->capacity) > 4) && (sdkp->capacity > 0xffffffffULL)) { int old_sector_size = sector_size; sd_printk(KERN_NOTICE, sdkp, "Very big device. " "Trying to use READ CAPACITY(16).\n"); sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size < 0) { sd_printk(KERN_NOTICE, sdkp, "Using 0xffffffff as device size\n"); sdkp->capacity = 1 + (sector_t) 0xffffffff; sector_size = old_sector_size; goto got_data; } /* Remember that READ CAPACITY(16) succeeded */ sdp->try_rc_10_first = 0; } } /* Some devices are known to return the total number of blocks, * not the highest block number. Some devices have versions * which do this and others which do not. Some devices we might * suspect of doing this but we don't know for certain. * * If we know the reported capacity is wrong, decrement it. If * we can only guess, then assume the number of blocks is even * (usually true but not always) and err on the side of lowering * the capacity. */ if (sdp->fix_capacity || (sdp->guess_capacity && (sdkp->capacity & 0x01))) { sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " "from its reported value: %llu\n", (unsigned long long) sdkp->capacity); --sdkp->capacity; } got_data: if (sector_size == 0) { sector_size = 512; sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, " "assuming 512.\n"); } if (sector_size != 512 && sector_size != 1024 && sector_size != 2048 && sector_size != 4096) { sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", sector_size); /* * The user might want to re-format the drive with * a supported sectorsize. Once this happens, it * would be relatively trivial to set the thing up. * For this reason, we leave the thing in the table. */ sdkp->capacity = 0; /* * set a bogus sector size so the normal read/write * logic in the block layer will eventually refuse any * request on this device without tripping over power * of two sector size assumptions */ sector_size = 512; } lim->logical_block_size = sector_size; lim->physical_block_size = sdkp->physical_block_size; sdkp->device->sector_size = sector_size; if (sdkp->capacity > 0xffffffff) sdp->use_16_for_rw = 1; } /* * Print disk capacity */ static void sd_print_capacity(struct scsi_disk *sdkp, sector_t old_capacity) { int sector_size = sdkp->device->sector_size; char cap_str_2[10], cap_str_10[10]; if (!sdkp->first_scan && old_capacity == sdkp->capacity) return; string_get_size(sdkp->capacity, sector_size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); string_get_size(sdkp->capacity, sector_size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); sd_printk(KERN_NOTICE, sdkp, "%llu %d-byte logical blocks: (%s/%s)\n", (unsigned long long)sdkp->capacity, sector_size, cap_str_10, cap_str_2); if (sdkp->physical_block_size != sector_size) sd_printk(KERN_NOTICE, sdkp, "%u-byte physical blocks\n", sdkp->physical_block_size); } /* called with buffer of length 512 */ static inline int sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage, unsigned char *buffer, int len, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { /* * If we must use MODE SENSE(10), make sure that the buffer length * is at least 8 bytes so that the mode sense header fits. */ if (sdkp->device->use_10_for_ms && len < 8) len = 8; return scsi_mode_sense(sdkp->device, dbd, modepage, 0, buffer, len, SD_TIMEOUT, sdkp->max_retries, data, sshdr); } /* * read write protect setting, if possible - called only in sd_revalidate_disk() * called with buffer of length SD_BUF_SIZE */ static void sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) { int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; int old_wp = sdkp->write_prot; set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return; } if (sdp->use_192_bytes_for_3f) { res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 192, &data, NULL); } else { /* * First attempt: ask for all pages (0x3F), but only 4 bytes. * We have to start carefully: some devices hang if we ask * for more than is available. */ res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 4, &data, NULL); /* * Second attempt: ask for page 0 When only page 0 is * implemented, a request for page 3F may return Sense Key * 5: Illegal Request, Sense Code 24: Invalid field in * CDB. */ if (res < 0) res = sd_do_mode_sense(sdkp, 0, 0, buffer, 4, &data, NULL); /* * Third attempt: ask 255 bytes, as we did earlier. */ if (res < 0) res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 255, &data, NULL); } if (res < 0) { sd_first_printk(KERN_WARNING, sdkp, "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); set_disk_ro(sdkp->disk, sdkp->write_prot); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); sd_printk(KERN_DEBUG, sdkp, "Mode Sense: %4ph\n", buffer); } } } /* * sd_read_cache_type - called only from sd_revalidate_disk() * called with buffer of length SD_BUF_SIZE */ static void sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) { int len = 0, res; struct scsi_device *sdp = sdkp->device; int dbd; int modepage; int first_len; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; int old_wce = sdkp->WCE; int old_rcd = sdkp->RCD; int old_dpofua = sdkp->DPOFUA; if (sdkp->cache_override) return; first_len = 4; if (sdp->skip_ms_page_8) { if (sdp->type == TYPE_RBC) goto defaults; else { if (sdp->skip_ms_page_3f) goto defaults; modepage = 0x3F; if (sdp->use_192_bytes_for_3f) first_len = 192; dbd = 0; } } else if (sdp->type == TYPE_RBC) { modepage = 6; dbd = 8; } else { modepage = 8; dbd = 0; } /* cautiously ask */ res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, first_len, &data, &sshdr); if (res < 0) goto bad_sense; if (!data.header_length) { modepage = 6; first_len = 0; sd_first_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n"); } /* that went OK, now ask for the proper length */ len = data.length; /* * We're only interested in the first three bytes, actually. * But the data cache page is defined for the first 20. */ if (len < 3) goto bad_sense; else if (len > SD_BUF_SIZE) { sd_first_printk(KERN_NOTICE, sdkp, "Truncating mode parameter " "data from %d to %d bytes\n", len, SD_BUF_SIZE); len = SD_BUF_SIZE; } if (modepage == 0x3F && sdp->use_192_bytes_for_3f) len = 192; /* Get the data */ if (len > first_len) res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, len, &data, &sshdr); if (!res) { int offset = data.header_length + data.block_descriptor_length; while (offset < len) { u8 page_code = buffer[offset] & 0x3F; u8 spf = buffer[offset] & 0x40; if (page_code == 8 || page_code == 6) { /* We're interested only in the first 3 bytes. */ if (len - offset <= 2) { sd_first_printk(KERN_ERR, sdkp, "Incomplete mode parameter " "data\n"); goto defaults; } else { modepage = page_code; goto Page_found; } } else { /* Go to the next page */ if (spf && len - offset > 3) offset += 4 + (buffer[offset+2] << 8) + buffer[offset+3]; else if (!spf && len - offset > 1) offset += 2 + buffer[offset+1]; else { sd_first_printk(KERN_ERR, sdkp, "Incomplete mode " "parameter data\n"); goto defaults; } } } sd_first_printk(KERN_WARNING, sdkp, "No Caching mode page found\n"); goto defaults; Page_found: if (modepage == 8) { sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0); } else { sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0); sdkp->RCD = 0; } sdkp->DPOFUA = (data.device_specific & 0x10) != 0; if (sdp->broken_fua) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling FUA\n"); sdkp->DPOFUA = 0; } else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw && !sdkp->device->use_16_for_rw) { sd_first_printk(KERN_NOTICE, sdkp, "Uses READ/WRITE(6), disabling FUA\n"); sdkp->DPOFUA = 0; } /* No cache flush allowed for write protected devices */ if (sdkp->WCE && sdkp->write_prot) sdkp->WCE = 0; if (sdkp->first_scan || old_wce != sdkp->WCE || old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA) sd_printk(KERN_NOTICE, sdkp, "Write cache: %s, read cache: %s, %s\n", sdkp->WCE ? "enabled" : "disabled", sdkp->RCD ? "disabled" : "enabled", sdkp->DPOFUA ? "supports DPO and FUA" : "doesn't support DPO or FUA"); return; } bad_sense: if (res == -EIO && scsi_sense_valid(&sshdr) && sshdr.sense_key == ILLEGAL_REQUEST && sshdr.asc == 0x24 && sshdr.ascq == 0x0) /* Invalid field in CDB */ sd_first_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n"); else sd_first_printk(KERN_ERR, sdkp, "Asking for cache data failed\n"); defaults: if (sdp->wce_default_on) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming drive cache: write back\n"); sdkp->WCE = 1; } else { sd_first_printk(KERN_WARNING, sdkp, "Assuming drive cache: write through\n"); sdkp->WCE = 0; } sdkp->RCD = 0; sdkp->DPOFUA = 0; } static bool sd_is_perm_stream(struct scsi_disk *sdkp, unsigned int stream_id) { u8 cdb[16] = { SERVICE_ACTION_IN_16, SAI_GET_STREAM_STATUS }; struct { struct scsi_stream_status_header h; struct scsi_stream_status s; } buf; struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; int res; put_unaligned_be16(stream_id, &cdb[4]); put_unaligned_be32(sizeof(buf), &cdb[10]); res = scsi_execute_cmd(sdev, cdb, REQ_OP_DRV_IN, &buf, sizeof(buf), SD_TIMEOUT, sdkp->max_retries, &exec_args); if (res < 0) return false; if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); if (res) return false; if (get_unaligned_be32(&buf.h.len) < sizeof(struct scsi_stream_status)) return false; return buf.h.stream_status[0].perm; } static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; const struct scsi_io_group_descriptor *desc, *start, *end; u16 permanent_stream_count_old; struct scsi_sense_hdr sshdr; struct scsi_mode_data data; int res; if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS) return; res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a, /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0) return; start = (void *)buffer + data.header_length + 16; end = (void *)buffer + ALIGN_DOWN(data.header_length + data.length, sizeof(*end)); /* * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs * should assign the lowest numbered stream identifiers to permanent * streams. */ for (desc = start; desc < end; desc++) if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start)) break; permanent_stream_count_old = sdkp->permanent_stream_count; sdkp->permanent_stream_count = desc - start; if (sdkp->rscs && sdkp->permanent_stream_count < 2) sd_printk(KERN_INFO, sdkp, "Unexpected: RSCS has been set and the permanent stream count is %u\n", sdkp->permanent_stream_count); else if (sdkp->permanent_stream_count != permanent_stream_count_old) sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n", sdkp->permanent_stream_count); } /* * The ATO bit indicates whether the DIF application tag is available * for use by the operating system. */ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) { int res, offset; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return; if (sdkp->protection_type == 0) return; res = scsi_mode_sense(sdp, 1, 0x0a, 0, buffer, 36, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0 || !data.header_length || data.length < 6) { sd_first_printk(KERN_WARNING, sdkp, "getting Control mode page failed, assume no ATO\n"); if (res == -EIO && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); return; } offset = data.header_length + data.block_descriptor_length; if ((buffer[offset] & 0x3f) != 0x0a) { sd_first_printk(KERN_ERR, sdkp, "ATO Got wrong page\n"); return; } if ((buffer[offset + 5] & 0x80) == 0) return; sdkp->ATO = 1; return; } static unsigned int sd_discard_mode(struct scsi_disk *sdkp) { if (!sdkp->lbpme) return SD_LBP_FULL; if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ if (sdkp->max_unmap_blocks) return SD_LBP_UNMAP; return SD_LBP_WS16; } /* LBP VPD page tells us what to use */ if (sdkp->lbpu && sdkp->max_unmap_blocks) return SD_LBP_UNMAP; if (sdkp->lbpws) return SD_LBP_WS16; if (sdkp->lbpws10) return SD_LBP_WS10; return SD_LBP_DISABLE; } /* * Query disk device for preferred I/O sizes. */ static void sd_read_block_limits(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_vpd *vpd; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb0); if (!vpd || vpd->len < 16) goto out; sdkp->min_xfer_blocks = get_unaligned_be16(&vpd->data[6]); sdkp->max_xfer_blocks = get_unaligned_be32(&vpd->data[8]); sdkp->opt_xfer_blocks = get_unaligned_be32(&vpd->data[12]); if (vpd->len >= 64) { unsigned int lba_count, desc_count; sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]); if (!sdkp->lbpme) goto config_atomic; lba_count = get_unaligned_be32(&vpd->data[20]); desc_count = get_unaligned_be32(&vpd->data[24]); if (lba_count && desc_count) sdkp->max_unmap_blocks = lba_count; sdkp->unmap_granularity = get_unaligned_be32(&vpd->data[28]); if (vpd->data[32] & 0x80) sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]); sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]); sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]); sd_config_atomic(sdkp, lim); } out: rcu_read_unlock(); } /* Parse the Block Limits Extension VPD page (0xb7) */ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) { struct scsi_vpd *vpd; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); if (vpd && vpd->len >= 2) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } /* Query block device characteristics */ static void sd_read_block_characteristics(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_vpd *vpd; u16 rot; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb1); if (!vpd || vpd->len <= 8) { rcu_read_unlock(); return; } rot = get_unaligned_be16(&vpd->data[4]); sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); if (rot == 1) lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (!sdkp->first_scan) return; if (sdkp->device->type == TYPE_ZBC) sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n"); else if (sdkp->zoned == 1) sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n"); else if (sdkp->zoned == 2) sd_printk(KERN_NOTICE, sdkp, "Drive-managed SMR disk\n"); } /** * sd_read_block_provisioning - Query provisioning VPD page * @sdkp: disk to query */ static void sd_read_block_provisioning(struct scsi_disk *sdkp) { struct scsi_vpd *vpd; if (sdkp->lbpme == 0) return; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb2); if (!vpd || vpd->len < 8) { rcu_read_unlock(); return; } sdkp->lbpvpd = 1; sdkp->lbpu = (vpd->data[5] >> 7) & 1; /* UNMAP */ sdkp->lbpws = (vpd->data[5] >> 6) & 1; /* WRITE SAME(16) w/ UNMAP */ sdkp->lbpws10 = (vpd->data[5] >> 5) & 1; /* WRITE SAME(10) w/ UNMAP */ rcu_read_unlock(); } static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; if (sdev->host->no_write_same) { sdev->no_write_same = 1; return; } if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY, 0) < 0) { struct scsi_vpd *vpd; sdev->no_report_opcodes = 1; /* Disable WRITE SAME if REPORT SUPPORTED OPERATION * CODES is unsupported and the device has an ATA * Information VPD page (SAT). */ rcu_read_lock(); vpd = rcu_dereference(sdev->vpd_pg89); if (vpd) sdev->no_write_same = 1; rcu_read_unlock(); } if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16, 0) == 1) sdkp->ws16 = 1; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME, 0) == 1) sdkp->ws10 = 1; } static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; if (!sdev->security_supported) return; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, SECURITY_PROTOCOL_IN, 0) == 1 && scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, SECURITY_PROTOCOL_OUT, 0) == 1) sdkp->security = 1; } static inline sector_t sd64_to_sectors(struct scsi_disk *sdkp, u8 *buf) { return logical_to_sectors(sdkp->device, get_unaligned_be64(buf)); } /** * sd_read_cpr - Query concurrent positioning ranges * @sdkp: disk to query */ static void sd_read_cpr(struct scsi_disk *sdkp) { struct blk_independent_access_ranges *iars = NULL; unsigned char *buffer = NULL; unsigned int nr_cpr = 0; int i, vpd_len, buf_len = SD_BUF_SIZE; u8 *desc; /* * We need to have the capacity set first for the block layer to be * able to check the ranges. */ if (sdkp->first_scan) return; if (!sdkp->capacity) goto out; /* * Concurrent Positioning Ranges VPD: there can be at most 256 ranges, * leading to a maximum page size of 64 + 256*32 bytes. */ buf_len = 64 + 256*32; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer || scsi_get_vpd_page(sdkp->device, 0xb9, buffer, buf_len)) goto out; /* We must have at least a 64B header and one 32B range descriptor */ vpd_len = get_unaligned_be16(&buffer[2]) + 4; if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) { sd_printk(KERN_ERR, sdkp, "Invalid Concurrent Positioning Ranges VPD page\n"); goto out; } nr_cpr = (vpd_len - 64) / 32; if (nr_cpr == 1) { nr_cpr = 0; goto out; } iars = disk_alloc_independent_access_ranges(sdkp->disk, nr_cpr); if (!iars) { nr_cpr = 0; goto out; } desc = &buffer[64]; for (i = 0; i < nr_cpr; i++, desc += 32) { if (desc[0] != i) { sd_printk(KERN_ERR, sdkp, "Invalid Concurrent Positioning Range number\n"); nr_cpr = 0; break; } iars->ia_range[i].sector = sd64_to_sectors(sdkp, desc + 8); iars->ia_range[i].nr_sectors = sd64_to_sectors(sdkp, desc + 16); } out: disk_set_independent_access_ranges(sdkp->disk, iars); if (nr_cpr && sdkp->nr_actuators != nr_cpr) { sd_printk(KERN_NOTICE, sdkp, "%u concurrent positioning ranges\n", nr_cpr); sdkp->nr_actuators = nr_cpr; } kfree(buffer); } static bool sd_validate_min_xfer_size(struct scsi_disk *sdkp) { struct scsi_device *sdp = sdkp->device; unsigned int min_xfer_bytes = logical_to_bytes(sdp, sdkp->min_xfer_blocks); if (sdkp->min_xfer_blocks == 0) return false; if (min_xfer_bytes & (sdkp->physical_block_size - 1)) { sd_first_printk(KERN_WARNING, sdkp, "Preferred minimum I/O size %u bytes not a " \ "multiple of physical block size (%u bytes)\n", min_xfer_bytes, sdkp->physical_block_size); sdkp->min_xfer_blocks = 0; return false; } sd_first_printk(KERN_INFO, sdkp, "Preferred minimum I/O size %u bytes\n", min_xfer_bytes); return true; } /* * Determine the device's preferred I/O size for reads and writes * unless the reported value is unreasonably small, large, not a * multiple of the physical block size, or simply garbage. */ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, unsigned int dev_max) { struct scsi_device *sdp = sdkp->device; unsigned int opt_xfer_bytes = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); unsigned int min_xfer_bytes = logical_to_bytes(sdp, sdkp->min_xfer_blocks); if (sdkp->opt_xfer_blocks == 0) return false; if (sdkp->opt_xfer_blocks > dev_max) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ "> dev_max (%u logical blocks)\n", sdkp->opt_xfer_blocks, dev_max); return false; } if (sdkp->opt_xfer_blocks > SD_DEF_XFER_BLOCKS) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ "> sd driver limit (%u logical blocks)\n", sdkp->opt_xfer_blocks, SD_DEF_XFER_BLOCKS); return false; } if (opt_xfer_bytes < PAGE_SIZE) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes < " \ "PAGE_SIZE (%u bytes)\n", opt_xfer_bytes, (unsigned int)PAGE_SIZE); return false; } if (min_xfer_bytes && opt_xfer_bytes % min_xfer_bytes) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes not a " \ "multiple of preferred minimum block " \ "size (%u bytes)\n", opt_xfer_bytes, min_xfer_bytes); return false; } if (opt_xfer_bytes & (sdkp->physical_block_size - 1)) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes not a " \ "multiple of physical block size (%u bytes)\n", opt_xfer_bytes, sdkp->physical_block_size); return false; } sd_first_printk(KERN_INFO, sdkp, "Optimal transfer size %u bytes\n", opt_xfer_bytes); return true; } static void sd_read_block_zero(struct scsi_disk *sdkp) { struct scsi_device *sdev = sdkp->device; unsigned int buf_len = sdev->sector_size; u8 *buffer, cmd[16] = { }; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer) return; if (sdev->use_16_for_rw) { cmd[0] = READ_16; put_unaligned_be64(0, &cmd[2]); /* Logical block address 0 */ put_unaligned_be32(1, &cmd[10]);/* Transfer 1 logical block */ } else { cmd[0] = READ_10; put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ } scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, SD_TIMEOUT, sdkp->max_retries, NULL); kfree(buffer); } /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. * @disk: struct gendisk we care about **/ static int sd_revalidate_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; sector_t old_capacity = sdkp->capacity; struct queue_limits lim; unsigned char *buffer; unsigned int dev_max; int err; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); /* * If the device is offline, don't try and read capacity or any * of the other niceties. */ if (!scsi_device_online(sdp)) goto out; buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); if (!buffer) { sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " "allocation failure.\n"); goto out; } sd_spinup_disk(sdkp); lim = queue_limits_start_update(sdkp->disk->queue); /* * Without media there is no reason to ask; moreover, some devices * react badly if we do. */ if (sdkp->media_present) { sd_read_capacity(sdkp, &lim, buffer); /* * Some USB/UAS devices return generic values for mode pages * until the media has been accessed. Trigger a READ operation * to force the device to populate mode pages. */ if (sdp->read_before_ms) sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); sd_read_block_limits(sdkp, &lim); sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); } sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); sd_read_cache_type(sdkp, buffer); sd_read_io_hints(sdkp, buffer); sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); sd_config_protection(sdkp, &lim); } /* * We now have all cache related info, determine how we deal * with flush requests. */ sd_set_flush_flag(sdkp, &lim); /* Initial block count limit based on CDB TRANSFER LENGTH field size. */ dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS; /* Some devices report a maximum block count for READ/WRITE requests. */ dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); lim.max_dev_sectors = logical_to_sectors(sdp, dev_max); if (sd_validate_min_xfer_size(sdkp)) lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); else lim.io_min = 0; /* * Limit default to SCSI host optimal sector limit if set. There may be * an impact on performance for when the size of a request exceeds this * host limit. */ lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; if (sd_validate_opt_xfer_size(sdkp, dev_max)) { lim.io_opt = min_not_zero(lim.io_opt, logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); } sdkp->first_scan = 0; set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp, &lim); kfree(buffer); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; /* * Query concurrent positioning ranges after * queue_limits_commit_update() unlocked q->limits_lock to avoid * deadlock with q->sysfs_dir_lock and q->sysfs_lock. */ if (sdkp->media_present && scsi_device_supports_vpd(sdp)) sd_read_cpr(sdkp); /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk * capacity to 0. */ if (sd_zbc_revalidate_zones(sdkp)) set_capacity_and_notify(disk, 0); out: return 0; } /** * sd_unlock_native_capacity - unlock native capacity * @disk: struct gendisk to set capacity for * * Block layer calls this function if it detects that partitions * on @disk reach beyond the end of the device. If the SCSI host * implements ->unlock_native_capacity() method, it's invoked to * give it a chance to adjust the device capacity. * * CONTEXT: * Defined by block layer. Might sleep. */ static void sd_unlock_native_capacity(struct gendisk *disk) { struct scsi_device *sdev = scsi_disk(disk)->device; if (sdev->host->hostt->unlock_native_capacity) sdev->host->hostt->unlock_native_capacity(sdev); } /** * sd_format_disk_name - format disk name * @prefix: name prefix - ie. "sd" for SCSI disks * @index: index of the disk to format name for * @buf: output buffer * @buflen: length of the output buffer * * SCSI disk names starts at sda. The 26th device is sdz and the * 27th is sdaa. The last one for two lettered suffix is sdzz * which is followed by sdaaa. * * This is basically 26 base counting with one extra 'nil' entry * at the beginning from the second digit on and can be * determined using similar method as 26 base conversion with the * index shifted -1 after each digit is computed. * * CONTEXT: * Don't care. * * RETURNS: * 0 on success, -errno on failure. */ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) { const int base = 'z' - 'a' + 1; char *begin = buf + strlen(prefix); char *end = buf + buflen; char *p; int unit; p = end - 1; *p = '\0'; unit = base; do { if (p == begin) return -EINVAL; *--p = 'a' + (index % unit); index = (index / unit) - 1; } while (index >= 0); memmove(begin, p, end - p); memcpy(buf, prefix, strlen(prefix)); return 0; } /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once * for each scsi device (not just disks) present. * @dev: pointer to device object * * Returns 0 if successful (or not interested in this scsi device * (e.g. scanner)); 1 when there is an error. * * Note: this function is invoked from the scsi mid-level. * This function sets up the mapping between a given * <host,channel,id,lun> (found in sdp) and new device name * (e.g. /dev/sda). More precisely it is the block device major * and minor number that is chosen here. * * Assume sd_probe is not re-entrant (for time being) * Also think about sd_probe() and sd_remove() running coincidentally. **/ static int sd_probe(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); struct scsi_disk *sdkp; struct gendisk *gd; int index; int error; scsi_autopm_get_device(sdp); error = -ENODEV; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC) goto out; if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) { sdev_printk(KERN_WARNING, sdp, "Unsupported ZBC host-managed device.\n"); goto out; } SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, "sd_probe\n")); error = -ENOMEM; sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL); if (!sdkp) goto out; gd = blk_mq_alloc_disk_for_queue(sdp->request_queue, &sd_bio_compl_lkclass); if (!gd) goto out_free; index = ida_alloc(&sd_index_ida, GFP_KERNEL); if (index < 0) { sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); goto out_put; } error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); if (error) { sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); goto out_free_index; } sdkp->device = sdp; sdkp->disk = gd; sdkp->index = index; sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); else blk_queue_rq_timeout(sdp->request_queue, SD_MOD_TIMEOUT); } device_initialize(&sdkp->disk_dev); sdkp->disk_dev.parent = get_device(dev); sdkp->disk_dev.class = &sd_disk_class; dev_set_name(&sdkp->disk_dev, "%s", dev_name(dev)); error = device_add(&sdkp->disk_dev); if (error) { put_device(&sdkp->disk_dev); goto out; } dev_set_drvdata(dev, sdkp); gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); gd->minors = SD_MINORS; gd->fops = &sd_fops; gd->private_data = sdkp; /* defaults, until the device tells us otherwise */ sdp->sector_size = 512; sdkp->capacity = 0; sdkp->media_present = 1; sdkp->write_prot = 0; sdkp->cache_override = 0; sdkp->WCE = 0; sdkp->RCD = 0; sdkp->ATO = 0; sdkp->first_scan = 1; sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS; sd_revalidate_disk(gd); if (sdp->removable) { gd->flags |= GENHD_FL_REMOVABLE; gd->events |= DISK_EVENT_MEDIA_CHANGE; gd->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT; } blk_pm_runtime_init(sdp->request_queue, dev); if (sdp->rpm_autosuspend) { pm_runtime_set_autosuspend_delay(dev, sdp->host->rpm_autosuspend_delay); } error = device_add_disk(dev, gd, NULL); if (error) { device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } if (sdkp->security) { sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit); if (sdkp->opal_dev) sd_printk(KERN_NOTICE, sdkp, "supports TCG Opal\n"); } sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", sdp->removable ? "removable " : ""); scsi_autopm_put_device(sdp); return 0; out_free_index: ida_free(&sd_index_ida, index); out_put: put_disk(gd); out_free: kfree(sdkp); out: scsi_autopm_put_device(sdp); return error; } /** * sd_remove - called whenever a scsi disk (previously recognized by * sd_probe) is detached from the system. It is called (potentially * multiple times) during sd module unload. * @dev: pointer to device object * * Note: this function is invoked from the scsi mid-level. * This function potentially frees up a device name (e.g. /dev/sdc) * that could be re-used by a subsequent sd_probe(). * This function is not called when the built-in sd driver is "exit-ed". **/ static int sd_remove(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); scsi_autopm_get_device(sdkp->device); device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); if (!sdkp->suspended) sd_shutdown(dev); put_disk(sdkp->disk); return 0; } static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); ida_free(&sd_index_ida, sdkp->index); put_device(&sdkp->device->sdev_gendev); free_opal_dev(sdkp->opal_dev); kfree(sdkp); } static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { /* Power on, reset, or bus device reset occurred */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 0, .result = SAM_STAT_CHECK_CONDITION, }, { /* Power on occurred */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 1, .result = SAM_STAT_CHECK_CONDITION, }, { /* SCSI bus reset */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 2, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .total_allowed = 3, .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .req_flags = BLK_MQ_REQ_PM, .failures = &failures, }; struct scsi_device *sdp = sdkp->device; int res; if (start) cmd[4] |= 1; /* START */ if (sdp->start_stop_pwr_cond) cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */ if (!scsi_device_online(sdp)) return -ENODEV; res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Start/Stop Unit failed", res); if (res > 0 && scsi_sense_valid(&sshdr)) { sd_print_sense_hdr(sdkp, &sshdr); /* 0x3a is medium not present */ if (sshdr.asc == 0x3a) res = 0; } } /* SCSI error codes must not go to the generic layer */ if (res) return -EIO; return 0; } /* * Send a SYNCHRONIZE CACHE instruction down to the device through * the normal SCSI command structure. Wait for the command to * complete. */ static void sd_shutdown(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); if (!sdkp) return; /* this can happen */ if (pm_runtime_suspended(dev)) return; if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); sd_sync_cache(sdkp); } if ((system_state != SYSTEM_RESTART && sdkp->device->manage_system_start_stop) || (system_state == SYSTEM_POWER_OFF && sdkp->device->manage_shutdown)) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } } static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) { return (sdev->manage_system_start_stop && !runtime) || (sdev->manage_runtime_start_stop && runtime); } static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ return 0; if (sdkp->WCE && sdkp->media_present) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp); /* ignore OFFLINE device */ if (ret == -ENODEV) return 0; if (ret) return ret; } if (sd_do_start_stop(sdkp->device, runtime)) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); if (!runtime) ret = 0; } if (!ret) sdkp->suspended = true; return ret; } static int sd_suspend_system(struct device *dev) { if (pm_runtime_suspended(dev)) return 0; return sd_suspend_common(dev, false); } static int sd_suspend_runtime(struct device *dev) { return sd_suspend_common(dev, true); } static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; } return 0; } static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); sdkp->suspended = false; } return ret; } static int sd_resume_system(struct device *dev) { if (pm_runtime_suspended(dev)) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp = sdkp ? sdkp->device : NULL; if (sdp && sdp->force_runtime_start_on_system_start) pm_request_resume(dev); return 0; } return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; sdp = sdkp->device; if (sdp->ignore_media_change) { /* clear the device's sense data */ static const u8 cmd[10] = { REQUEST_SENSE }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, }; if (scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, sdp->request_queue->rq_timeout, 1, &exec_args)) sd_printk(KERN_NOTICE, sdkp, "Failed to clear sense data\n"); } return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { .suspend = sd_suspend_system, .resume = sd_resume_system, .poweroff = sd_suspend_system, .restore = sd_resume_system, .runtime_suspend = sd_suspend_runtime, .runtime_resume = sd_resume_runtime, }; static struct scsi_driver sd_template = { .gendrv = { .name = "sd", .probe = sd_probe, .probe_type = PROBE_PREFER_ASYNCHRONOUS, .remove = sd_remove, .shutdown = sd_shutdown, .pm = &sd_pm_ops, }, .rescan = sd_rescan, .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, .eh_action = sd_eh_action, .eh_reset = sd_eh_reset, }; /** * init_sd - entry point for this driver (both when built in or when * a module). * * Note: this function registers this driver with the scsi mid-level. **/ static int __init init_sd(void) { int majors = 0, i, err; SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); for (i = 0; i < SD_MAJORS; i++) { if (__register_blkdev(sd_major(i), "sd", sd_default_probe)) continue; majors++; } if (!majors) return -ENODEV; err = class_register(&sd_disk_class); if (err) goto err_out; sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0); if (!sd_page_pool) { printk(KERN_ERR "sd: can't init discard page pool\n"); err = -ENOMEM; goto err_out_class; } err = scsi_register_driver(&sd_template.gendrv); if (err) goto err_out_driver; return 0; err_out_driver: mempool_destroy(sd_page_pool); err_out_class: class_unregister(&sd_disk_class); err_out: for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); return err; } /** * exit_sd - exit point for this driver (when it is a module). * * Note: this function unregisters this driver from the scsi mid-level. **/ static void __exit exit_sd(void) { int i; SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); scsi_unregister_driver(&sd_template.gendrv); mempool_destroy(sd_page_pool); class_unregister(&sd_disk_class); for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); } module_init(init_sd); module_exit(exit_sd); void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { scsi_print_sense_hdr(sdkp->device, sdkp->disk ? sdkp->disk->disk_name : NULL, sshdr); } void sd_print_result(const struct scsi_disk *sdkp, const char *msg, int result) { const char *hb_string = scsi_hostbyte_string(result); if (hb_string) sd_printk(KERN_INFO, sdkp, "%s: Result: hostbyte=%s driverbyte=%s\n", msg, hb_string ? hb_string : "invalid", "DRIVER_OK"); else sd_printk(KERN_INFO, sdkp, "%s: Result: hostbyte=0x%02x driverbyte=%s\n", msg, host_byte(result), "DRIVER_OK"); } |
| 19 2 9 71 2 16 2 76 76 53 23 23 23 6 23 23 17 24 6 26 35 6 6 35 24 75 4 1 178 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM bcachefs #if !defined(_TRACE_BCACHEFS_H) || defined(TRACE_HEADER_MULTI_READ) #include <linux/tracepoint.h> #define TRACE_BPOS_entries(name) \ __field(u64, name##_inode ) \ __field(u64, name##_offset ) \ __field(u32, name##_snapshot ) #define TRACE_BPOS_assign(dst, src) \ __entry->dst##_inode = (src).inode; \ __entry->dst##_offset = (src).offset; \ __entry->dst##_snapshot = (src).snapshot DECLARE_EVENT_CLASS(bpos, TP_PROTO(const struct bpos *p), TP_ARGS(p), TP_STRUCT__entry( TRACE_BPOS_entries(p) ), TP_fast_assign( TRACE_BPOS_assign(p, *p); ), TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) ); DECLARE_EVENT_CLASS(fs_str, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str), TP_STRUCT__entry( __field(dev_t, dev ) __string(str, str ) ), TP_fast_assign( __entry->dev = c->dev; __assign_str(str); ), TP_printk("%d,%d\n%s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) ); DECLARE_EVENT_CLASS(trans_str, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str), TP_ARGS(trans, caller_ip, str), TP_STRUCT__entry( __field(dev_t, dev ) __array(char, trans_fn, 32 ) __field(unsigned long, caller_ip ) __string(str, str ) ), TP_fast_assign( __entry->dev = trans->c->dev; strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __assign_str(str); ), TP_printk("%d,%d %s %pS %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) ); DECLARE_EVENT_CLASS(trans_str_nocaller, TP_PROTO(struct btree_trans *trans, const char *str), TP_ARGS(trans, str), TP_STRUCT__entry( __field(dev_t, dev ) __array(char, trans_fn, 32 ) __string(str, str ) ), TP_fast_assign( __entry->dev = trans->c->dev; strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __assign_str(str); ), TP_printk("%d,%d %s %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn, __get_str(str)) ); DECLARE_EVENT_CLASS(btree_node_nofs, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), TP_STRUCT__entry( __field(dev_t, dev ) __field(u8, level ) __field(u8, btree_id ) TRACE_BPOS_entries(pos) ), TP_fast_assign( __entry->dev = c->dev; __entry->level = b->c.level; __entry->btree_id = b->c.btree_id; TRACE_BPOS_assign(pos, b->key.k.p); ), TP_printk("%d,%d %u %s %llu:%llu:%u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->level, bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); DECLARE_EVENT_CLASS(btree_node, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_ARGS(trans, b), TP_STRUCT__entry( __field(dev_t, dev ) __array(char, trans_fn, 32 ) __field(u8, level ) __field(u8, btree_id ) TRACE_BPOS_entries(pos) ), TP_fast_assign( __entry->dev = trans->c->dev; strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); __entry->level = b->c.level; __entry->btree_id = b->c.btree_id; TRACE_BPOS_assign(pos, b->key.k.p); ), TP_printk("%d,%d %s %u %s %llu:%llu:%u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn, __entry->level, bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); DECLARE_EVENT_CLASS(bch_fs, TP_PROTO(struct bch_fs *c), TP_ARGS(c), TP_STRUCT__entry( __field(dev_t, dev ) ), TP_fast_assign( __entry->dev = c->dev; ), TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) ); DECLARE_EVENT_CLASS(btree_trans, TP_PROTO(struct btree_trans *trans), TP_ARGS(trans), TP_STRUCT__entry( __field(dev_t, dev ) __array(char, trans_fn, 32 ) ), TP_fast_assign( __entry->dev = trans->c->dev; strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); ), TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->trans_fn) ); DECLARE_EVENT_CLASS(bio, TP_PROTO(struct bio *bio), TP_ARGS(bio), TP_STRUCT__entry( __field(dev_t, dev ) __field(sector_t, sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) ), TP_fast_assign( __entry->dev = bio->bi_bdev ? bio_dev(bio) : 0; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector) ); /* disk_accounting.c */ TRACE_EVENT(accounting_mem_insert, TP_PROTO(struct bch_fs *c, const char *acc), TP_ARGS(c, acc), TP_STRUCT__entry( __field(dev_t, dev ) __field(unsigned, new_nr ) __string(acc, acc ) ), TP_fast_assign( __entry->dev = c->dev; __entry->new_nr = c->accounting.k.nr; __assign_str(acc); ), TP_printk("%d,%d entries %u added %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->new_nr, __get_str(acc)) ); /* fs.c: */ TRACE_EVENT(bch2_sync_fs, TP_PROTO(struct super_block *sb, int wait), TP_ARGS(sb, wait), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, wait ) ), TP_fast_assign( __entry->dev = sb->s_dev; __entry->wait = wait; ), TP_printk("dev %d,%d wait %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->wait) ); /* fs-io.c: */ TRACE_EVENT(bch2_fsync, TP_PROTO(struct file *file, int datasync), TP_ARGS(file, datasync), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, parent ) __field( int, datasync ) ), TP_fast_assign( struct dentry *dentry = file->f_path.dentry; __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->parent = d_inode(dentry->d_parent)->i_ino; __entry->datasync = datasync; ), TP_printk("dev %d,%d ino %lu parent %lu datasync %d ", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) ); /* super-io.c: */ TRACE_EVENT(write_super, TP_PROTO(struct bch_fs *c, unsigned long ip), TP_ARGS(c, ip), TP_STRUCT__entry( __field(dev_t, dev ) __field(unsigned long, ip ) ), TP_fast_assign( __entry->dev = c->dev; __entry->ip = ip; ), TP_printk("%d,%d for %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (void *) __entry->ip) ); /* io.c: */ DEFINE_EVENT(bio, read_promote, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); TRACE_EVENT(read_nopromote, TP_PROTO(struct bch_fs *c, int ret), TP_ARGS(c, ret), TP_STRUCT__entry( __field(dev_t, dev ) __array(char, ret, 32 ) ), TP_fast_assign( __entry->dev = c->dev; strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret)); ), TP_printk("%d,%d ret %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ret) ); DEFINE_EVENT(bio, read_bounce, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); DEFINE_EVENT(bio, read_split, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); DEFINE_EVENT(bio, read_retry, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); DEFINE_EVENT(bio, read_reuse_race, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /* Journal */ DEFINE_EVENT(bch_fs, journal_full, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); DEFINE_EVENT(fs_str, journal_entry_full, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); DEFINE_EVENT(fs_str, journal_entry_close, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); DEFINE_EVENT(bio, journal_write, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); TRACE_EVENT(journal_reclaim_start, TP_PROTO(struct bch_fs *c, bool direct, bool kicked, u64 min_nr, u64 min_key_cache, u64 btree_cache_dirty, u64 btree_cache_total, u64 btree_key_cache_dirty, u64 btree_key_cache_total), TP_ARGS(c, direct, kicked, min_nr, min_key_cache, btree_cache_dirty, btree_cache_total, btree_key_cache_dirty, btree_key_cache_total), TP_STRUCT__entry( __field(dev_t, dev ) __field(bool, direct ) __field(bool, kicked ) __field(u64, min_nr ) __field(u64, min_key_cache ) __field(u64, btree_cache_dirty ) __field(u64, btree_cache_total ) __field(u64, btree_key_cache_dirty ) __field(u64, btree_key_cache_total ) ), TP_fast_assign( __entry->dev = c->dev; __entry->direct = direct; __entry->kicked = kicked; __entry->min_nr = min_nr; __entry->min_key_cache = min_key_cache; __entry->btree_cache_dirty = btree_cache_dirty; __entry->btree_cache_total = btree_cache_total; __entry->btree_key_cache_dirty = btree_key_cache_dirty; __entry->btree_key_cache_total = btree_key_cache_total; ), TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->direct, __entry->kicked, __entry->min_nr, __entry->min_key_cache, __entry->btree_cache_dirty, __entry->btree_cache_total, __entry->btree_key_cache_dirty, __entry->btree_key_cache_total) ); TRACE_EVENT(journal_reclaim_finish, TP_PROTO(struct bch_fs *c, u64 nr_flushed), TP_ARGS(c, nr_flushed), TP_STRUCT__entry( __field(dev_t, dev ) __field(u64, nr_flushed ) ), TP_fast_assign( __entry->dev = c->dev; __entry->nr_flushed = nr_flushed; ), TP_printk("%d,%d flushed %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_flushed) ); /* bset.c: */ DEFINE_EVENT(bpos, bkey_pack_pos_fail, TP_PROTO(const struct bpos *p), TP_ARGS(p) ); /* Btree cache: */ TRACE_EVENT(btree_cache_scan, TP_PROTO(long nr_to_scan, long can_free, long ret), TP_ARGS(nr_to_scan, can_free, ret), TP_STRUCT__entry( __field(long, nr_to_scan ) __field(long, can_free ) __field(long, ret ) ), TP_fast_assign( __entry->nr_to_scan = nr_to_scan; __entry->can_free = can_free; __entry->ret = ret; ), TP_printk("scanned for %li nodes, can free %li, ret %li", __entry->nr_to_scan, __entry->can_free, __entry->ret) ); DEFINE_EVENT(btree_node_nofs, btree_cache_reap, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) ); DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock_fail, TP_PROTO(struct btree_trans *trans), TP_ARGS(trans) ); DEFINE_EVENT(btree_trans, btree_cache_cannibalize_lock, TP_PROTO(struct btree_trans *trans), TP_ARGS(trans) ); DEFINE_EVENT(btree_trans, btree_cache_cannibalize, TP_PROTO(struct btree_trans *trans), TP_ARGS(trans) ); DEFINE_EVENT(btree_trans, btree_cache_cannibalize_unlock, TP_PROTO(struct btree_trans *trans), TP_ARGS(trans) ); /* Btree */ DEFINE_EVENT(btree_node, btree_node_read, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_ARGS(trans, b) ); TRACE_EVENT(btree_node_write, TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors), TP_ARGS(b, bytes, sectors), TP_STRUCT__entry( __field(enum btree_node_type, type) __field(unsigned, bytes ) __field(unsigned, sectors ) ), TP_fast_assign( __entry->type = btree_node_type(b); __entry->bytes = bytes; __entry->sectors = sectors; ), TP_printk("bkey type %u bytes %u sectors %u", __entry->type , __entry->bytes, __entry->sectors) ); DEFINE_EVENT(btree_node, btree_node_alloc, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_free, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_ARGS(trans, b) ); TRACE_EVENT(btree_reserve_get_fail, TP_PROTO(const char *trans_fn, unsigned long caller_ip, size_t required, int ret), TP_ARGS(trans_fn, caller_ip, required, ret), TP_STRUCT__entry( __array(char, trans_fn, 32 ) __field(unsigned long, caller_ip ) __field(size_t, required ) __array(char, ret, 32 ) ), TP_fast_assign( strscpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->required = required; strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret)); ), TP_printk("%s %pS required %zu ret %s", __entry->trans_fn, (void *) __entry->caller_ip, __entry->required, __entry->ret) ); DEFINE_EVENT(btree_node, btree_node_compact, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_merge, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_ARGS(trans, b) ); DEFINE_EVENT(btree_node, btree_node_split, TP_PROTO(struct btree_trans *trans, struct btree *b), TP_AR |